43#define DEBUG_TYPE "legalizer"
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
56static std::pair<int, int>
62 unsigned NumParts =
Size / NarrowSize;
63 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
66 if (LeftoverSize == 0)
71 if (LeftoverSize % EltSize != 0)
81 return std::make_pair(NumParts, NumLeftover);
108 : MIRBuilder(Builder), Observer(Observer),
MRI(MF.getRegInfo()),
109 LI(*MF.getSubtarget().getLegalizerInfo()),
110 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
115 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
116 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
125 if (isa<GIntrinsic>(
MI))
128 switch (Step.Action) {
143 return bitcast(
MI, Step.TypeIdx, Step.NewType);
146 return lower(
MI, Step.TypeIdx, Step.NewType);
163void LegalizerHelper::insertParts(
Register DstReg,
185 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
187 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
188 return mergeMixedSubvectors(DstReg, AllRegs);
193 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
194 extractGCDType(GCDRegs, GCDTy, PartReg);
195 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
196 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
209void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
212 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
213 appendVectorElts(AllElts, PartRegs[i]);
219 appendVectorElts(AllElts, Leftover);
227 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
229 const int StartIdx = Regs.
size();
230 const int NumResults =
MI.getNumOperands() - 1;
232 for (
int I = 0;
I != NumResults; ++
I)
233 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
239 if (SrcTy == GCDTy) {
254 extractGCDType(Parts, GCDTy, SrcReg);
258LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
260 unsigned PadStrategy) {
265 int NumOrigSrc = VRegs.
size();
271 if (NumOrigSrc < NumParts * NumSubParts) {
272 if (PadStrategy == TargetOpcode::G_ZEXT)
274 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 assert(PadStrategy == TargetOpcode::G_SEXT);
298 for (
int I = 0;
I != NumParts; ++
I) {
299 bool AllMergePartsArePadding =
true;
302 for (
int J = 0; J != NumSubParts; ++J) {
303 int Idx =
I * NumSubParts + J;
304 if (
Idx >= NumOrigSrc) {
305 SubMerge[J] = PadReg;
309 SubMerge[J] = VRegs[
Idx];
312 AllMergePartsArePadding =
false;
318 if (AllMergePartsArePadding && !AllPadReg) {
319 if (PadStrategy == TargetOpcode::G_ANYEXT)
321 else if (PadStrategy == TargetOpcode::G_ZEXT)
331 Remerge[
I] = AllPadReg;
335 if (NumSubParts == 1)
336 Remerge[
I] = SubMerge[0];
341 if (AllMergePartsArePadding && !AllPadReg)
342 AllPadReg = Remerge[
I];
345 VRegs = std::move(Remerge);
349void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
356 if (DstTy == LCMTy) {
370 UnmergeDefs[0] = DstReg;
371 for (
unsigned I = 1;
I != NumDefs; ++
I)
383#define RTLIBCASE_INT(LibcallPrefix) \
387 return RTLIB::LibcallPrefix##32; \
389 return RTLIB::LibcallPrefix##64; \
391 return RTLIB::LibcallPrefix##128; \
393 llvm_unreachable("unexpected size"); \
397#define RTLIBCASE(LibcallPrefix) \
401 return RTLIB::LibcallPrefix##32; \
403 return RTLIB::LibcallPrefix##64; \
405 return RTLIB::LibcallPrefix##80; \
407 return RTLIB::LibcallPrefix##128; \
409 llvm_unreachable("unexpected size"); \
414 case TargetOpcode::G_MUL:
416 case TargetOpcode::G_SDIV:
418 case TargetOpcode::G_UDIV:
420 case TargetOpcode::G_SREM:
422 case TargetOpcode::G_UREM:
424 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
426 case TargetOpcode::G_FADD:
428 case TargetOpcode::G_FSUB:
430 case TargetOpcode::G_FMUL:
432 case TargetOpcode::G_FDIV:
434 case TargetOpcode::G_FEXP:
436 case TargetOpcode::G_FEXP2:
438 case TargetOpcode::G_FEXP10:
440 case TargetOpcode::G_FREM:
442 case TargetOpcode::G_FPOW:
444 case TargetOpcode::G_FPOWI:
446 case TargetOpcode::G_FMA:
448 case TargetOpcode::G_FSIN:
450 case TargetOpcode::G_FCOS:
452 case TargetOpcode::G_FTAN:
454 case TargetOpcode::G_FASIN:
456 case TargetOpcode::G_FACOS:
458 case TargetOpcode::G_FATAN:
460 case TargetOpcode::G_FATAN2:
462 case TargetOpcode::G_FSINH:
464 case TargetOpcode::G_FCOSH:
466 case TargetOpcode::G_FTANH:
468 case TargetOpcode::G_FLOG10:
470 case TargetOpcode::G_FLOG:
472 case TargetOpcode::G_FLOG2:
474 case TargetOpcode::G_FLDEXP:
476 case TargetOpcode::G_FCEIL:
478 case TargetOpcode::G_FFLOOR:
480 case TargetOpcode::G_FMINNUM:
482 case TargetOpcode::G_FMAXNUM:
484 case TargetOpcode::G_FSQRT:
486 case TargetOpcode::G_FRINT:
488 case TargetOpcode::G_FNEARBYINT:
490 case TargetOpcode::G_INTRINSIC_TRUNC:
492 case TargetOpcode::G_INTRINSIC_ROUND:
494 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
496 case TargetOpcode::G_INTRINSIC_LRINT:
498 case TargetOpcode::G_INTRINSIC_LLRINT:
526 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
538 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
545 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
548 Register PReg = Next->getOperand(0).getReg();
556 if (Ret->getNumImplicitOperands() != 1)
559 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
583 Info.OrigRet = Result;
586 (Result.Ty->isVoidTy() ||
591 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
592 if (!CLI.lowerCall(MIRBuilder,
Info))
595 if (
MI &&
Info.LoweredTailCall) {
596 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
607 "Expected instr following MI to be return or debug inst?");
611 }
while (
MI->getNextNode());
641 Args.push_back({MO.getReg(), OpType, 0});
643 {
MI.getOperand(0).
getReg(), OpType, 0}, Args,
654 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
658 LLT OpLLT =
MRI.getType(Reg);
659 Type *OpTy =
nullptr;
664 Args.push_back({Reg, OpTy, 0});
670 unsigned Opc =
MI.getOpcode();
672 case TargetOpcode::G_BZERO:
673 RTLibcall = RTLIB::BZERO;
675 case TargetOpcode::G_MEMCPY:
676 RTLibcall = RTLIB::MEMCPY;
677 Args[0].Flags[0].setReturned();
679 case TargetOpcode::G_MEMMOVE:
680 RTLibcall = RTLIB::MEMMOVE;
681 Args[0].Flags[0].setReturned();
683 case TargetOpcode::G_MEMSET:
684 RTLibcall = RTLIB::MEMSET;
685 Args[0].Flags[0].setReturned();
690 const char *
Name = TLI.getLibcallName(RTLibcall);
700 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
704 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
707 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
708 if (!CLI.lowerCall(MIRBuilder,
Info))
711 if (
Info.LoweredTailCall) {
712 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
723 "Expected instr following MI to be return or debug inst?");
727 }
while (
MI.getNextNode());
737 unsigned Opc =
MI.getOpcode();
738 auto &AtomicMI = cast<GMemOperation>(
MI);
739 auto &MMO = AtomicMI.getMMO();
740 auto Ordering = MMO.getMergedOrdering();
741 LLT MemType = MMO.getMemoryType();
744 return RTLIB::UNKNOWN_LIBCALL;
746#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
748 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
750 case TargetOpcode::G_ATOMIC_CMPXCHG:
751 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
753 return getOutlineAtomicHelper(LC, Ordering, MemSize);
755 case TargetOpcode::G_ATOMICRMW_XCHG: {
757 return getOutlineAtomicHelper(LC, Ordering, MemSize);
759 case TargetOpcode::G_ATOMICRMW_ADD:
760 case TargetOpcode::G_ATOMICRMW_SUB: {
762 return getOutlineAtomicHelper(LC, Ordering, MemSize);
764 case TargetOpcode::G_ATOMICRMW_AND: {
766 return getOutlineAtomicHelper(LC, Ordering, MemSize);
768 case TargetOpcode::G_ATOMICRMW_OR: {
770 return getOutlineAtomicHelper(LC, Ordering, MemSize);
772 case TargetOpcode::G_ATOMICRMW_XOR: {
774 return getOutlineAtomicHelper(LC, Ordering, MemSize);
777 return RTLIB::UNKNOWN_LIBCALL;
790 unsigned Opc =
MI.getOpcode();
792 case TargetOpcode::G_ATOMIC_CMPXCHG:
793 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
796 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
797 MI.getFirst4RegLLTs();
800 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
801 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
802 NewLLT) =
MI.getFirst5RegLLTs();
812 case TargetOpcode::G_ATOMICRMW_XCHG:
813 case TargetOpcode::G_ATOMICRMW_ADD:
814 case TargetOpcode::G_ATOMICRMW_SUB:
815 case TargetOpcode::G_ATOMICRMW_AND:
816 case TargetOpcode::G_ATOMICRMW_OR:
817 case TargetOpcode::G_ATOMICRMW_XOR: {
818 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
821 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
825 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
840 const char *
Name = TLI.getLibcallName(RTLibcall);
850 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
854 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
855 if (!CLI.lowerCall(MIRBuilder,
Info))
867 case TargetOpcode::G_FPEXT:
869 case TargetOpcode::G_FPTRUNC:
871 case TargetOpcode::G_FPTOSI:
873 case TargetOpcode::G_FPTOUI:
875 case TargetOpcode::G_SITOFP:
877 case TargetOpcode::G_UITOFP:
888 if (FromType->isIntegerTy()) {
890 Arg.
Flags[0].setSExt();
892 Arg.
Flags[0].setZExt();
897 {
MI.getOperand(0).
getReg(), ToType, 0}, Arg, LocObserver,
904 switch (
MI.getOpcode()) {
905 case TargetOpcode::G_GET_FPENV:
906 RTLibcall = RTLIB::FEGETENV;
908 case TargetOpcode::G_SET_FPENV:
909 case TargetOpcode::G_RESET_FPENV:
910 RTLibcall = RTLIB::FESETENV;
912 case TargetOpcode::G_GET_FPMODE:
913 RTLibcall = RTLIB::FEGETMODE;
915 case TargetOpcode::G_SET_FPMODE:
916 case TargetOpcode::G_RESET_FPMODE:
917 RTLibcall = RTLIB::FESETMODE;
946 auto &Ctx = MF.getFunction().getContext();
957 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
964 LocObserver,
nullptr);
986 auto &Ctx = MF.getFunction().getContext();
1002 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1008 LocObserver,
nullptr);
1014static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1016#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1020 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1022 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1024 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1026 llvm_unreachable("unexpected size"); \
1069 const auto Cond =
Cmp->getCond();
1083 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1096 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1098 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1111 const auto [OeqLibcall, OeqPred] =
1113 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1115 const auto [UnoLibcall, UnoPred] =
1117 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1132 const auto [OeqLibcall, OeqPred] =
1137 const auto [UnoLibcall, UnoPred] =
1142 if (NotOeq && NotUno)
1161 const auto [InversedLibcall, InversedPred] =
1163 if (!BuildLibcall(InversedLibcall,
1189 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1191 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1208 switch (
MI.getOpcode()) {
1211 case TargetOpcode::G_MUL:
1212 case TargetOpcode::G_SDIV:
1213 case TargetOpcode::G_UDIV:
1214 case TargetOpcode::G_SREM:
1215 case TargetOpcode::G_UREM:
1216 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1225 case TargetOpcode::G_FADD:
1226 case TargetOpcode::G_FSUB:
1227 case TargetOpcode::G_FMUL:
1228 case TargetOpcode::G_FDIV:
1229 case TargetOpcode::G_FMA:
1230 case TargetOpcode::G_FPOW:
1231 case TargetOpcode::G_FREM:
1232 case TargetOpcode::G_FCOS:
1233 case TargetOpcode::G_FSIN:
1234 case TargetOpcode::G_FTAN:
1235 case TargetOpcode::G_FACOS:
1236 case TargetOpcode::G_FASIN:
1237 case TargetOpcode::G_FATAN:
1238 case TargetOpcode::G_FATAN2:
1239 case TargetOpcode::G_FCOSH:
1240 case TargetOpcode::G_FSINH:
1241 case TargetOpcode::G_FTANH:
1242 case TargetOpcode::G_FLOG10:
1243 case TargetOpcode::G_FLOG:
1244 case TargetOpcode::G_FLOG2:
1245 case TargetOpcode::G_FEXP:
1246 case TargetOpcode::G_FEXP2:
1247 case TargetOpcode::G_FEXP10:
1248 case TargetOpcode::G_FCEIL:
1249 case TargetOpcode::G_FFLOOR:
1250 case TargetOpcode::G_FMINNUM:
1251 case TargetOpcode::G_FMAXNUM:
1252 case TargetOpcode::G_FSQRT:
1253 case TargetOpcode::G_FRINT:
1254 case TargetOpcode::G_FNEARBYINT:
1255 case TargetOpcode::G_INTRINSIC_TRUNC:
1256 case TargetOpcode::G_INTRINSIC_ROUND:
1257 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1262 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1270 case TargetOpcode::G_INTRINSIC_LRINT:
1271 case TargetOpcode::G_INTRINSIC_LLRINT: {
1278 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1284 {{
MI.getOperand(1).
getReg(), HLTy, 0}}, LocObserver, &
MI);
1287 MI.eraseFromParent();
1290 case TargetOpcode::G_FPOWI:
1291 case TargetOpcode::G_FLDEXP: {
1298 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1303 {
MI.getOperand(1).getReg(), HLTy, 0},
1304 {
MI.getOperand(2).getReg(), ITy, 1}};
1305 Args[1].Flags[0].setSExt();
1308 Args, LocObserver, &
MI);
1313 case TargetOpcode::G_FPEXT:
1314 case TargetOpcode::G_FPTRUNC: {
1317 if (!FromTy || !ToTy)
1325 case TargetOpcode::G_FCMP: {
1329 MI.eraseFromParent();
1332 case TargetOpcode::G_FPTOSI:
1333 case TargetOpcode::G_FPTOUI: {
1338 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1346 case TargetOpcode::G_SITOFP:
1347 case TargetOpcode::G_UITOFP: {
1351 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1353 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1356 LocObserver, TLI, IsSigned);
1361 case TargetOpcode::G_ATOMICRMW_XCHG:
1362 case TargetOpcode::G_ATOMICRMW_ADD:
1363 case TargetOpcode::G_ATOMICRMW_SUB:
1364 case TargetOpcode::G_ATOMICRMW_AND:
1365 case TargetOpcode::G_ATOMICRMW_OR:
1366 case TargetOpcode::G_ATOMICRMW_XOR:
1367 case TargetOpcode::G_ATOMIC_CMPXCHG:
1368 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1374 case TargetOpcode::G_BZERO:
1375 case TargetOpcode::G_MEMCPY:
1376 case TargetOpcode::G_MEMMOVE:
1377 case TargetOpcode::G_MEMSET: {
1382 MI.eraseFromParent();
1385 case TargetOpcode::G_GET_FPENV:
1386 case TargetOpcode::G_GET_FPMODE: {
1392 case TargetOpcode::G_SET_FPENV:
1393 case TargetOpcode::G_SET_FPMODE: {
1399 case TargetOpcode::G_RESET_FPENV:
1400 case TargetOpcode::G_RESET_FPMODE: {
1409 MI.eraseFromParent();
1419 switch (
MI.getOpcode()) {
1422 case TargetOpcode::G_IMPLICIT_DEF: {
1432 if (SizeOp0 % NarrowSize != 0) {
1433 LLT ImplicitTy = NarrowTy;
1440 MI.eraseFromParent();
1444 int NumParts = SizeOp0 / NarrowSize;
1447 for (
int i = 0; i < NumParts; ++i)
1454 MI.eraseFromParent();
1457 case TargetOpcode::G_CONSTANT: {
1459 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1462 int NumParts = TotalSize / NarrowSize;
1465 for (
int I = 0;
I != NumParts; ++
I) {
1466 unsigned Offset =
I * NarrowSize;
1473 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1475 if (LeftoverBits != 0) {
1479 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1483 insertParts(
MI.getOperand(0).getReg(),
1484 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1486 MI.eraseFromParent();
1489 case TargetOpcode::G_SEXT:
1490 case TargetOpcode::G_ZEXT:
1491 case TargetOpcode::G_ANYEXT:
1493 case TargetOpcode::G_TRUNC: {
1499 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1505 MI.eraseFromParent();
1508 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1509 case TargetOpcode::G_FREEZE: {
1520 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1527 MI.eraseFromParent();
1530 case TargetOpcode::G_ADD:
1531 case TargetOpcode::G_SUB:
1532 case TargetOpcode::G_SADDO:
1533 case TargetOpcode::G_SSUBO:
1534 case TargetOpcode::G_SADDE:
1535 case TargetOpcode::G_SSUBE:
1536 case TargetOpcode::G_UADDO:
1537 case TargetOpcode::G_USUBO:
1538 case TargetOpcode::G_UADDE:
1539 case TargetOpcode::G_USUBE:
1541 case TargetOpcode::G_MUL:
1542 case TargetOpcode::G_UMULH:
1544 case TargetOpcode::G_EXTRACT:
1546 case TargetOpcode::G_INSERT:
1548 case TargetOpcode::G_LOAD: {
1549 auto &LoadMI = cast<GLoad>(
MI);
1550 Register DstReg = LoadMI.getDstReg();
1555 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1559 LoadMI.eraseFromParent();
1565 case TargetOpcode::G_ZEXTLOAD:
1566 case TargetOpcode::G_SEXTLOAD: {
1567 auto &LoadMI = cast<GExtLoad>(
MI);
1568 Register DstReg = LoadMI.getDstReg();
1569 Register PtrReg = LoadMI.getPointerReg();
1572 auto &MMO = LoadMI.getMMO();
1575 if (MemSize == NarrowSize) {
1577 }
else if (MemSize < NarrowSize) {
1579 }
else if (MemSize > NarrowSize) {
1584 if (isa<GZExtLoad>(LoadMI))
1589 LoadMI.eraseFromParent();
1592 case TargetOpcode::G_STORE: {
1593 auto &StoreMI = cast<GStore>(
MI);
1595 Register SrcReg = StoreMI.getValueReg();
1600 int NumParts = SizeOp0 / NarrowSize;
1602 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1603 if (SrcTy.
isVector() && LeftoverBits != 0)
1606 if (8 * StoreMI.getMemSize().getValue() != SrcTy.
getSizeInBits()) {
1610 StoreMI.eraseFromParent();
1616 case TargetOpcode::G_SELECT:
1618 case TargetOpcode::G_AND:
1619 case TargetOpcode::G_OR:
1620 case TargetOpcode::G_XOR: {
1632 case TargetOpcode::G_SHL:
1633 case TargetOpcode::G_LSHR:
1634 case TargetOpcode::G_ASHR:
1636 case TargetOpcode::G_CTLZ:
1637 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1638 case TargetOpcode::G_CTTZ:
1639 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1640 case TargetOpcode::G_CTPOP:
1642 switch (
MI.getOpcode()) {
1643 case TargetOpcode::G_CTLZ:
1644 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1646 case TargetOpcode::G_CTTZ:
1647 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1649 case TargetOpcode::G_CTPOP:
1659 case TargetOpcode::G_INTTOPTR:
1667 case TargetOpcode::G_PTRTOINT:
1675 case TargetOpcode::G_PHI: {
1678 if (SizeOp0 % NarrowSize != 0)
1681 unsigned NumParts = SizeOp0 / NarrowSize;
1685 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1693 for (
unsigned i = 0; i < NumParts; ++i) {
1697 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1698 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1703 MI.eraseFromParent();
1706 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1707 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1711 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1717 case TargetOpcode::G_ICMP: {
1731 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1732 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1745 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1746 auto LHS = std::get<0>(LHSAndRHS);
1747 auto RHS = std::get<1>(LHSAndRHS);
1755 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1756 auto LHS = std::get<0>(LHSAndRHS);
1757 auto RHS = std::get<1>(LHSAndRHS);
1759 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1760 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1761 TargetOpcode::G_ZEXT);
1768 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1770 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1775 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1779 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1794 LHSPartRegs[
I], RHSPartRegs[
I]);
1801 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1805 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1815 RHSLeftoverRegs[
I]);
1818 RHSLeftoverRegs[
I]);
1821 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1828 MI.eraseFromParent();
1831 case TargetOpcode::G_FCMP:
1840 case TargetOpcode::G_SEXT_INREG: {
1844 int64_t SizeInBits =
MI.getOperand(2).getImm();
1854 MO1.
setReg(TruncMIB.getReg(0));
1869 if (SizeOp0 % NarrowSize != 0)
1871 int NumParts = SizeOp0 / NarrowSize;
1879 for (
int i = 0; i < NumParts; ++i) {
1895 for (
int i = 0; i < NumParts; ++i) {
1898 PartialExtensionReg = DstRegs.
back();
1900 assert(PartialExtensionReg &&
1901 "Expected to visit partial extension before full");
1902 if (FullExtensionReg) {
1909 FullExtensionReg = DstRegs.
back();
1914 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1917 PartialExtensionReg = DstRegs.
back();
1924 MI.eraseFromParent();
1927 case TargetOpcode::G_BSWAP:
1928 case TargetOpcode::G_BITREVERSE: {
1929 if (SizeOp0 % NarrowSize != 0)
1934 unsigned NumParts = SizeOp0 / NarrowSize;
1935 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1938 for (
unsigned i = 0; i < NumParts; ++i) {
1940 {SrcRegs[NumParts - 1 - i]});
1947 MI.eraseFromParent();
1950 case TargetOpcode::G_PTR_ADD:
1951 case TargetOpcode::G_PTRMASK: {
1959 case TargetOpcode::G_FPTOUI:
1960 case TargetOpcode::G_FPTOSI:
1961 case TargetOpcode::G_FPTOUI_SAT:
1962 case TargetOpcode::G_FPTOSI_SAT:
1964 case TargetOpcode::G_FPEXT:
1971 case TargetOpcode::G_FLDEXP:
1972 case TargetOpcode::G_STRICT_FLDEXP:
1974 case TargetOpcode::G_VSCALE: {
1985 MI.eraseFromParent();
2013 unsigned OpIdx,
unsigned ExtOpcode) {
2016 MO.
setReg(ExtB.getReg(0));
2023 MO.
setReg(ExtB.getReg(0));
2027 unsigned OpIdx,
unsigned TruncOpcode) {
2036 unsigned OpIdx,
unsigned ExtOpcode) {
2075LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2080 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2081 if (DstTy.isVector())
2088 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2090 unsigned NumOps =
MI.getNumOperands();
2091 unsigned NumSrc =
MI.getNumOperands() - 1;
2092 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2094 if (WideSize >= DstSize) {
2098 for (
unsigned I = 2;
I != NumOps; ++
I) {
2099 const unsigned Offset = (
I - 1) * PartSize;
2106 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
2112 ResultReg = NextResult;
2115 if (WideSize > DstSize)
2117 else if (DstTy.isPointer())
2120 MI.eraseFromParent();
2145 const int GCD = std::gcd(SrcSize, WideSize);
2156 if (GCD == SrcSize) {
2160 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2166 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2168 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2172 const int PartsPerGCD = WideSize / GCD;
2176 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2191 MI.eraseFromParent();
2196LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2201 int NumDst =
MI.getNumOperands() - 1;
2202 Register SrcReg =
MI.getOperand(NumDst).getReg();
2207 Register Dst0Reg =
MI.getOperand(0).getReg();
2217 dbgs() <<
"Not casting non-integral address space integer\n");
2238 for (
int I = 1;
I != NumDst; ++
I) {
2244 MI.eraseFromParent();
2255 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2280 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2285 if (PartsPerRemerge == 1) {
2288 for (
int I = 0;
I != NumUnmerge; ++
I) {
2291 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2292 int Idx =
I * PartsPerUnmerge + J;
2294 MIB.addDef(
MI.getOperand(
Idx).getReg());
2301 MIB.addUse(Unmerge.getReg(
I));
2305 for (
int J = 0; J != NumUnmerge; ++J)
2306 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2309 for (
int I = 0;
I != NumDst; ++
I) {
2310 for (
int J = 0; J < PartsPerRemerge; ++J) {
2311 const int Idx =
I * PartsPerRemerge + J;
2316 RemergeParts.
clear();
2320 MI.eraseFromParent();
2325LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2327 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2328 unsigned Offset =
MI.getOperand(2).getImm();
2331 if (SrcTy.
isVector() || DstTy.isVector())
2347 if (DstTy.isPointer())
2354 MI.eraseFromParent();
2359 LLT ShiftTy = SrcTy;
2368 MI.eraseFromParent();
2399LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2401 if (TypeIdx != 0 || WideTy.
isVector())
2411LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2415 std::optional<Register> CarryIn;
2416 switch (
MI.getOpcode()) {
2419 case TargetOpcode::G_SADDO:
2420 Opcode = TargetOpcode::G_ADD;
2421 ExtOpcode = TargetOpcode::G_SEXT;
2423 case TargetOpcode::G_SSUBO:
2424 Opcode = TargetOpcode::G_SUB;
2425 ExtOpcode = TargetOpcode::G_SEXT;
2427 case TargetOpcode::G_UADDO:
2428 Opcode = TargetOpcode::G_ADD;
2429 ExtOpcode = TargetOpcode::G_ZEXT;
2431 case TargetOpcode::G_USUBO:
2432 Opcode = TargetOpcode::G_SUB;
2433 ExtOpcode = TargetOpcode::G_ZEXT;
2435 case TargetOpcode::G_SADDE:
2436 Opcode = TargetOpcode::G_UADDE;
2437 ExtOpcode = TargetOpcode::G_SEXT;
2438 CarryIn =
MI.getOperand(4).getReg();
2440 case TargetOpcode::G_SSUBE:
2441 Opcode = TargetOpcode::G_USUBE;
2442 ExtOpcode = TargetOpcode::G_SEXT;
2443 CarryIn =
MI.getOperand(4).getReg();
2445 case TargetOpcode::G_UADDE:
2446 Opcode = TargetOpcode::G_UADDE;
2447 ExtOpcode = TargetOpcode::G_ZEXT;
2448 CarryIn =
MI.getOperand(4).getReg();
2450 case TargetOpcode::G_USUBE:
2451 Opcode = TargetOpcode::G_USUBE;
2452 ExtOpcode = TargetOpcode::G_ZEXT;
2453 CarryIn =
MI.getOperand(4).getReg();
2474 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
2477 {LHSExt, RHSExt, *CarryIn})
2489 MI.eraseFromParent();
2494LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2496 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2497 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2498 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2499 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2500 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2525 {ShiftL, ShiftR},
MI.getFlags());
2533 MI.eraseFromParent();
2538LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2547 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2550 LLT OverflowTy = MRI.
getType(OriginalOverflow);
2557 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2566 WideMulCanOverflow ?
MI.getOpcode() : (
unsigned)TargetOpcode::G_MUL;
2569 if (WideMulCanOverflow)
2571 {LeftOperand, RightOperand});
2592 if (WideMulCanOverflow) {
2600 MI.eraseFromParent();
2606 unsigned Opcode =
MI.getOpcode();
2610 case TargetOpcode::G_ATOMICRMW_XCHG:
2611 case TargetOpcode::G_ATOMICRMW_ADD:
2612 case TargetOpcode::G_ATOMICRMW_SUB:
2613 case TargetOpcode::G_ATOMICRMW_AND:
2614 case TargetOpcode::G_ATOMICRMW_OR:
2615 case TargetOpcode::G_ATOMICRMW_XOR:
2616 case TargetOpcode::G_ATOMICRMW_MIN:
2617 case TargetOpcode::G_ATOMICRMW_MAX:
2618 case TargetOpcode::G_ATOMICRMW_UMIN:
2619 case TargetOpcode::G_ATOMICRMW_UMAX:
2620 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2626 case TargetOpcode::G_ATOMIC_CMPXCHG:
2627 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2634 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2644 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2649 case TargetOpcode::G_EXTRACT:
2650 return widenScalarExtract(
MI, TypeIdx, WideTy);
2651 case TargetOpcode::G_INSERT:
2652 return widenScalarInsert(
MI, TypeIdx, WideTy);
2653 case TargetOpcode::G_MERGE_VALUES:
2654 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2655 case TargetOpcode::G_UNMERGE_VALUES:
2656 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2657 case TargetOpcode::G_SADDO:
2658 case TargetOpcode::G_SSUBO:
2659 case TargetOpcode::G_UADDO:
2660 case TargetOpcode::G_USUBO:
2661 case TargetOpcode::G_SADDE:
2662 case TargetOpcode::G_SSUBE:
2663 case TargetOpcode::G_UADDE:
2664 case TargetOpcode::G_USUBE:
2665 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2666 case TargetOpcode::G_UMULO:
2667 case TargetOpcode::G_SMULO:
2668 return widenScalarMulo(
MI, TypeIdx, WideTy);
2669 case TargetOpcode::G_SADDSAT:
2670 case TargetOpcode::G_SSUBSAT:
2671 case TargetOpcode::G_SSHLSAT:
2672 case TargetOpcode::G_UADDSAT:
2673 case TargetOpcode::G_USUBSAT:
2674 case TargetOpcode::G_USHLSAT:
2675 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2676 case TargetOpcode::G_CTTZ:
2677 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2678 case TargetOpcode::G_CTLZ:
2679 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2680 case TargetOpcode::G_CTPOP: {
2691 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2692 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2693 ? TargetOpcode::G_ANYEXT
2694 : TargetOpcode::G_ZEXT;
2697 unsigned NewOpc = Opcode;
2698 if (NewOpc == TargetOpcode::G_CTTZ) {
2707 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2712 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2724 if (Opcode == TargetOpcode::G_CTLZ) {
2731 MI.eraseFromParent();
2734 case TargetOpcode::G_BSWAP: {
2743 MI.getOperand(0).setReg(DstExt);
2756 case TargetOpcode::G_BITREVERSE: {
2765 MI.getOperand(0).setReg(DstExt);
2774 case TargetOpcode::G_FREEZE:
2775 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2782 case TargetOpcode::G_ABS:
2789 case TargetOpcode::G_ADD:
2790 case TargetOpcode::G_AND:
2791 case TargetOpcode::G_MUL:
2792 case TargetOpcode::G_OR:
2793 case TargetOpcode::G_XOR:
2794 case TargetOpcode::G_SUB:
2795 case TargetOpcode::G_SHUFFLE_VECTOR:
2806 case TargetOpcode::G_SBFX:
2807 case TargetOpcode::G_UBFX:
2821 case TargetOpcode::G_SHL:
2837 case TargetOpcode::G_ROTR:
2838 case TargetOpcode::G_ROTL:
2847 case TargetOpcode::G_SDIV:
2848 case TargetOpcode::G_SREM:
2849 case TargetOpcode::G_SMIN:
2850 case TargetOpcode::G_SMAX:
2858 case TargetOpcode::G_SDIVREM:
2867 case TargetOpcode::G_ASHR:
2868 case TargetOpcode::G_LSHR:
2872 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2873 : TargetOpcode::G_ZEXT;
2886 case TargetOpcode::G_UDIV:
2887 case TargetOpcode::G_UREM:
2894 case TargetOpcode::G_UDIVREM:
2902 case TargetOpcode::G_UMIN:
2903 case TargetOpcode::G_UMAX: {
2910 ? TargetOpcode::G_SEXT
2911 : TargetOpcode::G_ZEXT;
2921 case TargetOpcode::G_SELECT:
2938 case TargetOpcode::G_FPTOSI:
2939 case TargetOpcode::G_FPTOUI:
2940 case TargetOpcode::G_INTRINSIC_LRINT:
2941 case TargetOpcode::G_INTRINSIC_LLRINT:
2942 case TargetOpcode::G_IS_FPCLASS:
2952 case TargetOpcode::G_SITOFP:
2962 case TargetOpcode::G_UITOFP:
2972 case TargetOpcode::G_FPTOSI_SAT:
2973 case TargetOpcode::G_FPTOUI_SAT:
2981 MI.getOperand(0).setReg(ExtReg);
2985 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3013 case TargetOpcode::G_LOAD:
3014 case TargetOpcode::G_SEXTLOAD:
3015 case TargetOpcode::G_ZEXTLOAD:
3021 case TargetOpcode::G_STORE: {
3035 MI.setMemRefs(MF, {NewMMO});
3043 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3049 case TargetOpcode::G_CONSTANT: {
3053 MRI.
getType(
MI.getOperand(0).getReg()));
3054 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3055 ExtOpc == TargetOpcode::G_ANYEXT) &&
3058 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3062 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3068 case TargetOpcode::G_FCONSTANT: {
3076 MI.eraseFromParent();
3079 case TargetOpcode::G_IMPLICIT_DEF: {
3085 case TargetOpcode::G_BRCOND:
3091 case TargetOpcode::G_FCMP:
3102 case TargetOpcode::G_ICMP:
3112 unsigned ExtOpcode =
3116 ? TargetOpcode::G_SEXT
3117 : TargetOpcode::G_ZEXT;
3124 case TargetOpcode::G_PTR_ADD:
3125 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3131 case TargetOpcode::G_PHI: {
3132 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3135 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3147 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3155 TargetOpcode::G_ANYEXT);
3170 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3206 case TargetOpcode::G_FADD:
3207 case TargetOpcode::G_FMUL:
3208 case TargetOpcode::G_FSUB:
3209 case TargetOpcode::G_FMA:
3210 case TargetOpcode::G_FMAD:
3211 case TargetOpcode::G_FNEG:
3212 case TargetOpcode::G_FABS:
3213 case TargetOpcode::G_FCANONICALIZE:
3214 case TargetOpcode::G_FMINNUM:
3215 case TargetOpcode::G_FMAXNUM:
3216 case TargetOpcode::G_FMINNUM_IEEE:
3217 case TargetOpcode::G_FMAXNUM_IEEE:
3218 case TargetOpcode::G_FMINIMUM:
3219 case TargetOpcode::G_FMAXIMUM:
3220 case TargetOpcode::G_FDIV:
3221 case TargetOpcode::G_FREM:
3222 case TargetOpcode::G_FCEIL:
3223 case TargetOpcode::G_FFLOOR:
3224 case TargetOpcode::G_FCOS:
3225 case TargetOpcode::G_FSIN:
3226 case TargetOpcode::G_FTAN:
3227 case TargetOpcode::G_FACOS:
3228 case TargetOpcode::G_FASIN:
3229 case TargetOpcode::G_FATAN:
3230 case TargetOpcode::G_FATAN2:
3231 case TargetOpcode::G_FCOSH:
3232 case TargetOpcode::G_FSINH:
3233 case TargetOpcode::G_FTANH:
3234 case TargetOpcode::G_FLOG10:
3235 case TargetOpcode::G_FLOG:
3236 case TargetOpcode::G_FLOG2:
3237 case TargetOpcode::G_FRINT:
3238 case TargetOpcode::G_FNEARBYINT:
3239 case TargetOpcode::G_FSQRT:
3240 case TargetOpcode::G_FEXP:
3241 case TargetOpcode::G_FEXP2:
3242 case TargetOpcode::G_FEXP10:
3243 case TargetOpcode::G_FPOW:
3244 case TargetOpcode::G_INTRINSIC_TRUNC:
3245 case TargetOpcode::G_INTRINSIC_ROUND:
3246 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3250 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3256 case TargetOpcode::G_FPOWI:
3257 case TargetOpcode::G_FLDEXP:
3258 case TargetOpcode::G_STRICT_FLDEXP: {
3260 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3281 case TargetOpcode::G_FFREXP: {
3294 case TargetOpcode::G_INTTOPTR:
3302 case TargetOpcode::G_PTRTOINT:
3310 case TargetOpcode::G_BUILD_VECTOR: {
3314 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3328 case TargetOpcode::G_SEXT_INREG:
3337 case TargetOpcode::G_PTRMASK: {
3345 case TargetOpcode::G_VECREDUCE_FADD:
3346 case TargetOpcode::G_VECREDUCE_FMUL:
3347 case TargetOpcode::G_VECREDUCE_FMIN:
3348 case TargetOpcode::G_VECREDUCE_FMAX:
3349 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3350 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3364 case TargetOpcode::G_VSCALE: {
3371 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3376 case TargetOpcode::G_SPLAT_VECTOR: {
3385 case TargetOpcode::G_INSERT_SUBVECTOR: {
3408 MI.eraseFromParent();
3417 auto Unmerge =
B.buildUnmerge(Ty, Src);
3418 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
3427 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3429 LLT DstLLT =
MRI.getType(DstReg);
3450 MI.eraseFromParent();
3461 MI.eraseFromParent();
3468 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3473 if (DstTy.isVector()) {
3474 int NumDstElt = DstTy.getNumElements();
3478 LLT DstCastTy = DstEltTy;
3479 LLT SrcPartTy = SrcEltTy;
3483 if (NumSrcElt < NumDstElt) {
3493 SrcPartTy = SrcEltTy;
3494 }
else if (NumSrcElt > NumDstElt) {
3505 DstCastTy = DstEltTy;
3515 MI.eraseFromParent();
3519 if (DstTy.isVector()) {
3523 MI.eraseFromParent();
3539 unsigned NewEltSize,
3540 unsigned OldEltSize) {
3541 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3542 LLT IdxTy =
B.getMRI()->getType(
Idx);
3545 auto OffsetMask =
B.buildConstant(
3547 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
3548 return B.buildShl(IdxTy, OffsetIdx,
3549 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3564 auto [Dst, DstTy, SrcVec, SrcVecTy,
Idx, IdxTy] =
MI.getFirst3RegLLTs();
3568 unsigned OldNumElts = SrcVecTy.getNumElements();
3575 if (NewNumElts > OldNumElts) {
3586 if (NewNumElts % OldNumElts != 0)
3590 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3599 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3603 NewOps[
I] = Elt.getReg(0);
3608 MI.eraseFromParent();
3612 if (NewNumElts < OldNumElts) {
3613 if (NewEltSize % OldEltSize != 0)
3635 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3654 MI.eraseFromParent();
3668 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3669 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3670 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3671 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3674 auto EltMask =
B.buildConstant(
3678 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3679 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3682 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3686 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3700 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy,
Idx, IdxTy] =
3701 MI.getFirst4RegLLTs();
3713 if (NewNumElts < OldNumElts) {
3714 if (NewEltSize % OldEltSize != 0)
3723 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3743 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
3747 MI.eraseFromParent();
3771 auto ConcatMI = dyn_cast<GConcatVectors>(&
MI);
3777 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3781 if (!LI.
isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3782 return UnableToLegalize;
3787 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3789 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3798 MI.eraseFromParent();
3815 auto ShuffleMI = cast<GShuffleVector>(&
MI);
3816 LLT DstTy = MRI.
getType(ShuffleMI->getReg(0));
3817 LLT SrcTy = MRI.
getType(ShuffleMI->getReg(1));
3833 MI.eraseFromParent();
3849 auto ES = cast<GExtractSubvector>(&
MI);
3863 LLT DstTy =
MRI.getType(Dst);
3864 LLT SrcTy =
MRI.getType(Src);
3870 if (DstTy == CastTy)
3878 if (CastEltSize < DstEltSize)
3881 auto AdjustAmt = CastEltSize / DstEltSize;
3882 if (
Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3883 SrcTyMinElts % AdjustAmt != 0)
3912 auto ES = cast<GInsertSubvector>(&
MI);
3927 LLT DstTy =
MRI.getType(Dst);
3928 LLT BigVecTy =
MRI.getType(BigVec);
3929 LLT SubVecTy =
MRI.getType(SubVec);
3931 if (DstTy == CastTy)
3946 if (CastEltSize < DstEltSize)
3949 auto AdjustAmt = CastEltSize / DstEltSize;
3950 if (
Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3951 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
3979 if (MemSizeInBits != MemStoreSizeInBits) {
3999 if (isa<GSExtLoad>(LoadMI)) {
4002 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4011 if (DstTy != LoadTy)
4037 uint64_t LargeSplitSize, SmallSplitSize;
4042 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4052 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4077 LargeSplitSize / 8);
4081 SmallPtr, *SmallMMO);
4086 if (AnyExtTy == DstTy)
4121 if (StoreWidth != StoreSizeInBits && !SrcTy.
isVector()) {
4152 uint64_t LargeSplitSize, SmallSplitSize;
4155 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
4162 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4245 switch (
MI.getOpcode()) {
4246 case TargetOpcode::G_LOAD: {
4264 case TargetOpcode::G_STORE: {
4280 case TargetOpcode::G_SELECT: {
4286 dbgs() <<
"bitcast action not implemented for vector select\n");
4297 case TargetOpcode::G_AND:
4298 case TargetOpcode::G_OR:
4299 case TargetOpcode::G_XOR: {
4307 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4309 case TargetOpcode::G_INSERT_VECTOR_ELT:
4311 case TargetOpcode::G_CONCAT_VECTORS:
4313 case TargetOpcode::G_SHUFFLE_VECTOR:
4315 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4317 case TargetOpcode::G_INSERT_SUBVECTOR:
4325void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4333 using namespace TargetOpcode;
4335 switch(
MI.getOpcode()) {
4338 case TargetOpcode::G_FCONSTANT:
4340 case TargetOpcode::G_BITCAST:
4342 case TargetOpcode::G_SREM:
4343 case TargetOpcode::G_UREM: {
4347 {MI.getOperand(1), MI.getOperand(2)});
4351 MI.eraseFromParent();
4354 case TargetOpcode::G_SADDO:
4355 case TargetOpcode::G_SSUBO:
4357 case TargetOpcode::G_UMULH:
4358 case TargetOpcode::G_SMULH:
4360 case TargetOpcode::G_SMULO:
4361 case TargetOpcode::G_UMULO: {
4364 auto [Res, Overflow,
LHS,
RHS] =
MI.getFirst4Regs();
4367 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4368 ? TargetOpcode::G_SMULH
4369 : TargetOpcode::G_UMULH;
4373 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4374 MI.removeOperand(1);
4385 if (Opcode == TargetOpcode::G_SMULH) {
4394 case TargetOpcode::G_FNEG: {
4395 auto [Res, SubByReg] =
MI.getFirst2Regs();
4401 MI.eraseFromParent();
4404 case TargetOpcode::G_FSUB:
4405 case TargetOpcode::G_STRICT_FSUB: {
4406 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
4412 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4417 MI.eraseFromParent();
4420 case TargetOpcode::G_FMAD:
4422 case TargetOpcode::G_FFLOOR:
4424 case TargetOpcode::G_LROUND:
4425 case TargetOpcode::G_LLROUND: {
4432 MI.eraseFromParent();
4435 case TargetOpcode::G_INTRINSIC_ROUND:
4437 case TargetOpcode::G_FRINT: {
4440 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4443 case TargetOpcode::G_INTRINSIC_LRINT:
4444 case TargetOpcode::G_INTRINSIC_LLRINT: {
4451 MI.eraseFromParent();
4454 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4455 auto [OldValRes, SuccessRes,
Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4458 **
MI.memoperands_begin());
4461 MI.eraseFromParent();
4464 case TargetOpcode::G_LOAD:
4465 case TargetOpcode::G_SEXTLOAD:
4466 case TargetOpcode::G_ZEXTLOAD:
4468 case TargetOpcode::G_STORE:
4470 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4471 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4472 case TargetOpcode::G_CTLZ:
4473 case TargetOpcode::G_CTTZ:
4474 case TargetOpcode::G_CTPOP:
4477 auto [Res, CarryOut,
LHS,
RHS] =
MI.getFirst4Regs();
4486 MI.eraseFromParent();
4490 auto [Res, CarryOut,
LHS,
RHS, CarryIn] =
MI.getFirst5Regs();
4516 MI.eraseFromParent();
4520 auto [Res, BorrowOut,
LHS,
RHS] =
MI.getFirst4Regs();
4525 MI.eraseFromParent();
4529 auto [Res, BorrowOut,
LHS,
RHS, BorrowIn] =
MI.getFirst5Regs();
4551 MI.eraseFromParent();
4582 case G_MERGE_VALUES:
4584 case G_UNMERGE_VALUES:
4586 case TargetOpcode::G_SEXT_INREG: {
4587 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4588 int64_t SizeInBits =
MI.getOperand(2).getImm();
4590 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4597 MI.eraseFromParent();
4600 case G_EXTRACT_VECTOR_ELT:
4601 case G_INSERT_VECTOR_ELT:
4603 case G_SHUFFLE_VECTOR:
4605 case G_VECTOR_COMPRESS:
4607 case G_DYN_STACKALLOC:
4611 case G_STACKRESTORE:
4621 case G_READ_REGISTER:
4622 case G_WRITE_REGISTER:
4669 case G_MEMCPY_INLINE:
4670 return lowerMemcpyInline(
MI);
4701 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4711 Align StackTypeAlign =
4724 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4736 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4739 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4750 "Converting bits to bytes lost precision");
4757 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4759 if (IdxTy != MRI.
getType(Index))
4774 std::initializer_list<unsigned> NonVecOpIndices) {
4775 if (
MI.getNumMemOperands() != 0)
4778 LLT VecTy =
MRI.getType(
MI.getReg(0));
4783 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
4816 int NumParts, NumLeftover;
4817 std::tie(NumParts, NumLeftover) =
4820 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
4821 for (
int i = 0; i < NumParts; ++i) {
4826 assert(NumLeftover == 1 &&
"expected exactly one leftover");
4835 for (
unsigned i = 0; i <
N; ++i) {
4838 else if (
Op.isImm())
4840 else if (
Op.isPredicate())
4862 std::initializer_list<unsigned> NonVecOpIndices) {
4864 "Non-compatible opcode or not specified non-vector operands");
4867 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4868 unsigned NumDefs =
MI.getNumDefs();
4876 for (
unsigned i = 0; i < NumDefs; ++i) {
4885 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4886 ++UseIdx, ++UseNo) {
4889 MI.getOperand(UseIdx));
4894 for (
auto Reg : SplitPieces)
4899 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4903 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4905 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4906 Defs.
push_back(OutputOpsPieces[DstNo][i]);
4909 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4910 Uses.push_back(InputOpsPieces[InputNo][i]);
4913 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4914 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
4919 for (
unsigned i = 0; i < NumDefs; ++i)
4920 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
4922 for (
unsigned i = 0; i < NumDefs; ++i)
4926 MI.eraseFromParent();
4935 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4936 unsigned NumDefs =
MI.getNumDefs();
4945 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4946 UseIdx += 2, ++UseNo) {
4954 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4956 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4962 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
4963 Phi.addUse(InputOpsPieces[j][i]);
4964 Phi.add(
MI.getOperand(1 + j * 2 + 1));
4974 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
4979 MI.eraseFromParent();
4987 const int NumDst =
MI.getNumOperands() - 1;
4988 const Register SrcReg =
MI.getOperand(NumDst).getReg();
4992 if (TypeIdx != 1 || NarrowTy == DstTy)
5018 const int PartsPerUnmerge = NumDst / NumUnmerge;
5020 for (
int I = 0;
I != NumUnmerge; ++
I) {
5023 for (
int J = 0; J != PartsPerUnmerge; ++J)
5024 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5025 MIB.
addUse(Unmerge.getReg(
I));
5028 MI.eraseFromParent();
5035 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5039 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5041 if (NarrowTy == SrcTy)
5051 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5065 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5067 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5073 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5074 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5075 ++i,
Offset += NumNarrowTyElts) {
5082 MI.eraseFromParent();
5086 assert(TypeIdx == 0 &&
"Bad type index");
5102 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5105 for (
unsigned i = 0; i < NumParts; ++i) {
5107 for (
unsigned j = 0; j < NumElts; ++j)
5108 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5114 MI.eraseFromParent();
5122 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5124 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5126 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5128 InsertVal =
MI.getOperand(2).getReg();
5143 IdxVal = MaybeCst->Value.getSExtValue();
5147 MI.eraseFromParent();
5152 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5155 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5156 TargetOpcode::G_ANYEXT);
5161 int64_t PartIdx = IdxVal / NewNumElts;
5170 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5171 VecParts[PartIdx] = InsertPart.getReg(0);
5175 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5180 MI.eraseFromParent();
5204 bool IsLoad = isa<GLoad>(LdStMI);
5216 int NumLeftover = -1;
5222 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5224 NumParts = NarrowRegs.
size();
5225 NumLeftover = NarrowLeftoverRegs.
size();
5242 auto MMO = LdStMI.
getMMO();
5244 unsigned NumParts,
unsigned Offset) ->
unsigned {
5247 for (
unsigned Idx = 0, E = NumParts;
Idx != E &&
Offset < TotalSize;
5249 unsigned ByteOffset =
Offset / 8;
5259 ValRegs.push_back(Dst);
5271 unsigned HandledOffset =
5272 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5276 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5279 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5280 LeftoverTy, NarrowLeftoverRegs);
5290 using namespace TargetOpcode;
5294 switch (
MI.getOpcode()) {
5295 case G_IMPLICIT_DEF:
5311 case G_FCANONICALIZE:
5328 case G_INTRINSIC_LRINT:
5329 case G_INTRINSIC_LLRINT:
5330 case G_INTRINSIC_ROUND:
5331 case G_INTRINSIC_ROUNDEVEN:
5334 case G_INTRINSIC_TRUNC:
5361 case G_FMINNUM_IEEE:
5362 case G_FMAXNUM_IEEE:
5382 case G_CTLZ_ZERO_UNDEF:
5384 case G_CTTZ_ZERO_UNDEF:
5400 case G_ADDRSPACE_CAST:
5413 case G_STRICT_FLDEXP:
5427 case G_UNMERGE_VALUES:
5429 case G_BUILD_VECTOR:
5430 assert(TypeIdx == 0 &&
"not a vector type index");
5432 case G_CONCAT_VECTORS:
5436 case G_EXTRACT_VECTOR_ELT:
5437 case G_INSERT_VECTOR_ELT:
5446 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5447 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5449 case G_SHUFFLE_VECTOR:
5455 case G_INTRINSIC_FPTRUNC_ROUND:
5465 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5466 "Not a bitcast operation");
5471 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5473 unsigned NewElemCount =
5479 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5484 for (
unsigned i = 0; i < SrcVRegs.
size(); i++)
5489 MI.eraseFromParent();
5495 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5499 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5500 MI.getFirst3RegLLTs();
5503 if (DstTy != Src1Ty)
5505 if (DstTy != Src2Ty)
5520 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5536 unsigned InputUsed[2] = {-1U, -1U};
5537 unsigned FirstMaskIdx =
High * NewElts;
5538 bool UseBuildVector =
false;
5539 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5541 int Idx = Mask[FirstMaskIdx + MaskOffset];
5546 if (Input >= std::size(Inputs)) {
5553 Idx -= Input * NewElts;
5557 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5558 if (InputUsed[OpNo] == Input) {
5561 }
else if (InputUsed[OpNo] == -1U) {
5563 InputUsed[OpNo] = Input;
5568 if (OpNo >= std::size(InputUsed)) {
5571 UseBuildVector =
true;
5579 if (UseBuildVector) {
5584 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5586 int Idx = Mask[FirstMaskIdx + MaskOffset];
5591 if (Input >= std::size(Inputs)) {
5598 Idx -= Input * NewElts;
5602 .buildExtractVectorElement(
5603 EltTy, Inputs[Input],
5610 }
else if (InputUsed[0] == -1U) {
5614 Register Op0 = Inputs[InputUsed[0]];
5618 : Inputs[InputUsed[1]];
5627 MI.eraseFromParent();
5633 auto &RdxMI = cast<GVecReduce>(
MI);
5640 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5646 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5649 const unsigned NumParts =
5655 if (DstTy != NarrowTy)
5661 unsigned NumPartsLeft = NumParts;
5662 while (NumPartsLeft > 1) {
5663 for (
unsigned Idx = 0;
Idx < NumPartsLeft - 1;
Idx += 2) {
5666 .buildInstr(ScalarOpc, {NarrowTy},
5667 {SplitSrcs[
Idx], SplitSrcs[
Idx + 1]})
5670 SplitSrcs = PartialResults;
5671 PartialResults.
clear();
5672 NumPartsLeft = SplitSrcs.
size();
5676 MI.eraseFromParent();
5681 for (
unsigned Idx = 1;
Idx < NumParts; ++
Idx)
5685 MI.eraseFromParent();
5689 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5699 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5702 Register Acc = PartialReductions[0];
5703 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5704 if (Part == NumParts - 1) {
5706 {Acc, PartialReductions[Part]});
5709 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5713 MI.eraseFromParent();
5719 unsigned int TypeIdx,
5721 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5722 MI.getFirst3RegLLTs();
5723 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5727 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5728 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5729 "Unexpected vecreduce opcode");
5730 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5731 ? TargetOpcode::G_FADD
5732 : TargetOpcode::G_FMUL;
5738 for (
unsigned i = 0; i < NumParts; i++)
5743 MI.eraseFromParent();
5750 unsigned ScalarOpc) {
5758 while (SplitSrcs.
size() > 1) {
5760 for (
unsigned Idx = 0;
Idx < SplitSrcs.
size()-1;
Idx += 2) {
5768 SplitSrcs = std::move(PartialRdxs);
5772 MI.getOperand(1).setReg(SplitSrcs[0]);
5779 const LLT HalfTy,
const LLT AmtTy) {
5787 MI.eraseFromParent();
5793 unsigned VTBits = 2 * NVTBits;
5796 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
5797 if (Amt.
ugt(VTBits)) {
5799 }
else if (Amt.
ugt(NVTBits)) {
5803 }
else if (Amt == NVTBits) {
5814 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5815 if (Amt.
ugt(VTBits)) {
5817 }
else if (Amt.
ugt(NVTBits)) {
5821 }
else if (Amt == NVTBits) {
5835 if (Amt.
ugt(VTBits)) {
5838 }
else if (Amt.
ugt(NVTBits)) {
5843 }
else if (Amt == NVTBits) {
5860 MI.eraseFromParent();
5884 if (DstEltSize % 2 != 0)
5890 const unsigned NewBitSize = DstEltSize / 2;
5916 switch (
MI.getOpcode()) {
5917 case TargetOpcode::G_SHL: {
5933 ResultRegs[0] =
Lo.getReg(0);
5934 ResultRegs[1] =
Hi.getReg(0);
5937 case TargetOpcode::G_LSHR:
5938 case TargetOpcode::G_ASHR: {
5948 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5962 ResultRegs[0] =
Lo.getReg(0);
5963 ResultRegs[1] =
Hi.getReg(0);
5971 MI.eraseFromParent();
5978 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
5981 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
5996 assert(Ty.
isScalar() &&
"Expected scalar type to make neutral element for");
6001 "getNeutralElementForVecReduce called with invalid opcode!");
6002 case TargetOpcode::G_VECREDUCE_ADD:
6003 case TargetOpcode::G_VECREDUCE_OR:
6004 case TargetOpcode::G_VECREDUCE_XOR:
6005 case TargetOpcode::G_VECREDUCE_UMAX:
6007 case TargetOpcode::G_VECREDUCE_MUL:
6009 case TargetOpcode::G_VECREDUCE_AND:
6010 case TargetOpcode::G_VECREDUCE_UMIN:
6013 case TargetOpcode::G_VECREDUCE_SMAX:
6016 case TargetOpcode::G_VECREDUCE_SMIN:
6019 case TargetOpcode::G_VECREDUCE_FADD:
6021 case TargetOpcode::G_VECREDUCE_FMUL:
6023 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6024 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6025 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6026 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6034 unsigned Opc =
MI.getOpcode();
6036 case TargetOpcode::G_IMPLICIT_DEF:
6037 case TargetOpcode::G_LOAD: {
6045 case TargetOpcode::G_STORE:
6052 case TargetOpcode::G_AND:
6053 case TargetOpcode::G_OR:
6054 case TargetOpcode::G_XOR:
6055 case TargetOpcode::G_ADD:
6056 case TargetOpcode::G_SUB:
6057 case TargetOpcode::G_MUL:
6058 case TargetOpcode::G_FADD:
6059 case TargetOpcode::G_FSUB:
6060 case TargetOpcode::G_FMUL:
6061 case TargetOpcode::G_FDIV:
6062 case TargetOpcode::G_FCOPYSIGN:
6063 case TargetOpcode::G_UADDSAT:
6064 case TargetOpcode::G_USUBSAT:
6065 case TargetOpcode::G_SADDSAT:
6066 case TargetOpcode::G_SSUBSAT:
6067 case TargetOpcode::G_SMIN:
6068 case TargetOpcode::G_SMAX:
6069 case TargetOpcode::G_UMIN:
6070 case TargetOpcode::G_UMAX:
6071 case TargetOpcode::G_FMINNUM:
6072 case TargetOpcode::G_FMAXNUM:
6073 case TargetOpcode::G_FMINNUM_IEEE:
6074 case TargetOpcode::G_FMAXNUM_IEEE:
6075 case TargetOpcode::G_FMINIMUM:
6076 case TargetOpcode::G_FMAXIMUM:
6077 case TargetOpcode::G_STRICT_FADD:
6078 case TargetOpcode::G_STRICT_FSUB:
6079 case TargetOpcode::G_STRICT_FMUL:
6080 case TargetOpcode::G_SHL:
6081 case TargetOpcode::G_ASHR:
6082 case TargetOpcode::G_LSHR: {
6090 case TargetOpcode::G_FMA:
6091 case TargetOpcode::G_STRICT_FMA:
6092 case TargetOpcode::G_FSHR:
6093 case TargetOpcode::G_FSHL: {
6102 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6103 case TargetOpcode::G_EXTRACT:
6110 case TargetOpcode::G_INSERT:
6111 case TargetOpcode::G_INSERT_VECTOR_ELT:
6112 case TargetOpcode::G_FREEZE:
6113 case TargetOpcode::G_FNEG:
6114 case TargetOpcode::G_FABS:
6115 case TargetOpcode::G_FSQRT:
6116 case TargetOpcode::G_FCEIL:
6117 case TargetOpcode::G_FFLOOR:
6118 case TargetOpcode::G_FNEARBYINT:
6119 case TargetOpcode::G_FRINT:
6120 case TargetOpcode::G_INTRINSIC_ROUND:
6121 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6122 case TargetOpcode::G_INTRINSIC_TRUNC:
6123 case TargetOpcode::G_BSWAP:
6124 case TargetOpcode::G_FCANONICALIZE:
6125 case TargetOpcode::G_SEXT_INREG:
6126 case TargetOpcode::G_ABS:
6134 case TargetOpcode::G_SELECT: {
6135 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6137 if (!CondTy.isScalar() ||
6145 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6150 if (CondTy.isVector())
6160 case TargetOpcode::G_UNMERGE_VALUES:
6162 case TargetOpcode::G_PHI:
6164 case TargetOpcode::G_SHUFFLE_VECTOR:
6166 case TargetOpcode::G_BUILD_VECTOR: {
6168 for (
auto Op :
MI.uses()) {
6178 MI.eraseFromParent();
6181 case TargetOpcode::G_SEXT:
6182 case TargetOpcode::G_ZEXT:
6183 case TargetOpcode::G_ANYEXT:
6184 case TargetOpcode::G_TRUNC:
6185 case TargetOpcode::G_FPTRUNC:
6186 case TargetOpcode::G_FPEXT:
6187 case TargetOpcode::G_FPTOSI:
6188 case TargetOpcode::G_FPTOUI:
6189 case TargetOpcode::G_FPTOSI_SAT:
6190 case TargetOpcode::G_FPTOUI_SAT:
6191 case TargetOpcode::G_SITOFP:
6192 case TargetOpcode::G_UITOFP: {
6212 case TargetOpcode::G_ICMP:
6213 case TargetOpcode::G_FCMP: {
6227 case TargetOpcode::G_BITCAST: {
6248 case TargetOpcode::G_VECREDUCE_FADD:
6249 case TargetOpcode::G_VECREDUCE_FMUL:
6250 case TargetOpcode::G_VECREDUCE_ADD:
6251 case TargetOpcode::G_VECREDUCE_MUL:
6252 case TargetOpcode::G_VECREDUCE_AND:
6253 case TargetOpcode::G_VECREDUCE_OR:
6254 case TargetOpcode::G_VECREDUCE_XOR:
6255 case TargetOpcode::G_VECREDUCE_SMAX:
6256 case TargetOpcode::G_VECREDUCE_SMIN:
6257 case TargetOpcode::G_VECREDUCE_UMAX:
6258 case TargetOpcode::G_VECREDUCE_UMIN: {
6262 auto NeutralElement = getNeutralElementForVecReduce(
6270 NeutralElement,
Idx);
6274 MO.
setReg(NewVec.getReg(0));
6286 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6288 unsigned MaskNumElts = Mask.size();
6292 if (MaskNumElts == SrcNumElts)
6295 if (MaskNumElts < SrcNumElts) {
6304 MI.getOperand(1).getReg(),
6305 MI.getOperand(2).getReg(), NewMask);
6306 MI.eraseFromParent();
6311 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
6312 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6320 MOps1[0] =
MI.getOperand(1).getReg();
6321 MOps2[0] =
MI.getOperand(2).getReg();
6328 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
6330 if (
Idx >=
static_cast<int>(SrcNumElts))
6331 Idx += PaddedMaskNumElts - SrcNumElts;
6336 if (MaskNumElts != PaddedMaskNumElts) {
6341 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
6351 MI.eraseFromParent();
6357 unsigned int TypeIdx,
LLT MoreTy) {
6358 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
6360 unsigned NumElts = DstTy.getNumElements();
6363 if (DstTy.isVector() && Src1Ty.isVector() &&
6364 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6372 if (DstTy != Src1Ty || DstTy != Src2Ty)
6380 for (
unsigned I = 0;
I != NumElts; ++
I) {
6382 if (
Idx <
static_cast<int>(NumElts))
6385 NewMask[
I] =
Idx - NumElts + WidenNumElts;
6390 MI.getOperand(1).getReg(),
6391 MI.getOperand(2).getReg(), NewMask);
6392 MI.eraseFromParent();
6401 unsigned SrcParts = Src1Regs.
size();
6402 unsigned DstParts = DstRegs.
size();
6404 unsigned DstIdx = 0;
6406 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6407 DstRegs[DstIdx] = FactorSum;
6409 unsigned CarrySumPrevDstIdx;
6412 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6414 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6415 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6417 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6421 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6422 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6424 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6434 if (DstIdx != DstParts - 1) {
6436 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
6437 FactorSum = Uaddo.
getReg(0);
6438 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
6439 for (
unsigned i = 2; i < Factors.
size(); ++i) {
6441 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
6442 FactorSum = Uaddo.
getReg(0);
6444 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6448 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6449 for (
unsigned i = 2; i < Factors.
size(); ++i)
6450 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6453 CarrySumPrevDstIdx = CarrySum;
6454 DstRegs[DstIdx] = FactorSum;
6471 unsigned Opcode =
MI.getOpcode();
6472 unsigned OpO, OpE, OpF;
6474 case TargetOpcode::G_SADDO:
6475 case TargetOpcode::G_SADDE:
6476 case TargetOpcode::G_UADDO:
6477 case TargetOpcode::G_UADDE:
6478 case TargetOpcode::G_ADD:
6479 OpO = TargetOpcode::G_UADDO;
6480 OpE = TargetOpcode::G_UADDE;
6481 OpF = TargetOpcode::G_UADDE;
6482 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6483 OpF = TargetOpcode::G_SADDE;
6485 case TargetOpcode::G_SSUBO:
6486 case TargetOpcode::G_SSUBE:
6487 case TargetOpcode::G_USUBO:
6488 case TargetOpcode::G_USUBE:
6489 case TargetOpcode::G_SUB:
6490 OpO = TargetOpcode::G_USUBO;
6491 OpE = TargetOpcode::G_USUBE;
6492 OpF = TargetOpcode::G_USUBE;
6493 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6494 OpF = TargetOpcode::G_SSUBE;
6501 unsigned NumDefs =
MI.getNumExplicitDefs();
6502 Register Src1 =
MI.getOperand(NumDefs).getReg();
6503 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
6506 CarryDst =
MI.getOperand(1).getReg();
6507 if (
MI.getNumOperands() == NumDefs + 3)
6508 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
6511 LLT LeftoverTy, DummyTy;
6513 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6518 int NarrowParts = Src1Regs.
size();
6519 Src1Regs.
append(Src1Left);
6520 Src2Regs.
append(Src2Left);
6523 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
6528 if (i == e - 1 && CarryDst)
6529 CarryOut = CarryDst;
6535 {Src1Regs[i], Src2Regs[i]});
6536 }
else if (i == e - 1) {
6538 {Src1Regs[i], Src2Regs[i], CarryIn});
6541 {Src1Regs[i], Src2Regs[i], CarryIn});
6547 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
6548 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6549 ArrayRef(DstRegs).drop_front(NarrowParts));
6551 MI.eraseFromParent();
6557 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
6565 if (
Size % NarrowSize != 0)
6568 unsigned NumParts =
Size / NarrowSize;
6569 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
6570 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6576 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6581 MI.eraseFromParent();
6591 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
6605 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6621 if (SizeOp1 % NarrowSize != 0)
6623 int NumParts = SizeOp1 / NarrowSize;
6627 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6631 uint64_t OpStart =
MI.getOperand(2).getImm();
6633 for (
int i = 0; i < NumParts; ++i) {
6634 unsigned SrcStart = i * NarrowSize;
6636 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6639 }
else if (SrcStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6647 int64_t ExtractOffset;
6649 if (OpStart < SrcStart) {
6651 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6653 ExtractOffset = OpStart - SrcStart;
6654 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6658 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6670 else if (DstRegs.
size() > 1)
6674 MI.eraseFromParent();
6689 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6692 SrcRegs.
append(LeftoverRegs);
6696 uint64_t OpStart =
MI.getOperand(3).getImm();
6698 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
6699 unsigned DstStart =
I * NarrowSize;
6701 if (DstStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6709 if (MRI.
getType(SrcRegs[
I]) == LeftoverTy) {
6715 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6723 int64_t ExtractOffset, InsertOffset;
6725 if (OpStart < DstStart) {
6727 ExtractOffset = DstStart - OpStart;
6728 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6730 InsertOffset = OpStart - DstStart;
6733 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6737 if (ExtractOffset != 0 || SegSize != OpSize) {
6757 MI.eraseFromParent();
6767 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
6773 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6774 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
6778 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6779 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6782 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6784 {Src0Regs[I], Src1Regs[I]});
6788 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6791 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6792 DstLeftoverRegs.
push_back(Inst.getReg(0));
6795 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6796 LeftoverTy, DstLeftoverRegs);
6798 MI.eraseFromParent();
6808 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
6815 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6816 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
6817 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6819 MI.eraseFromParent();
6829 Register CondReg =
MI.getOperand(1).getReg();
6841 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6842 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6846 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6847 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
6850 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6852 CondReg, Src1Regs[
I], Src2Regs[
I]);
6856 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6858 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
6862 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6863 LeftoverTy, DstLeftoverRegs);
6865 MI.eraseFromParent();
6875 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6879 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6882 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6884 auto C_0 =
B.buildConstant(NarrowTy, 0);
6886 UnmergeSrc.getReg(1), C_0);
6887 auto LoCTLZ = IsUndef ?
6888 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6889 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6890 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6891 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6892 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6893 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6895 MI.eraseFromParent();
6908 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6912 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6915 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6917 auto C_0 =
B.buildConstant(NarrowTy, 0);
6919 UnmergeSrc.getReg(0), C_0);
6920 auto HiCTTZ = IsUndef ?
6921 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6922 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6923 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6924 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6925 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6926 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6928 MI.eraseFromParent();
6941 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6951 MI.eraseFromParent();
6971 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
6972 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
6973 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
6974 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
6976 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
6978 MI.getOperand(2).setReg(Trunc.getReg(0));
6985 unsigned Opc =
MI.getOpcode();
6994 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6997 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7001 case TargetOpcode::G_CTLZ: {
7002 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7005 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7013 MI.eraseFromParent();
7029 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7033 Op = MIBOp.getReg(0);
7038 MI.eraseFromParent();
7041 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7044 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7048 case TargetOpcode::G_CTTZ: {
7049 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7052 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7061 MI.eraseFromParent();
7072 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7073 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7077 MI.eraseFromParent();
7081 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7082 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7086 case TargetOpcode::G_CTPOP: {
7097 auto C_1 =
B.buildConstant(Ty, 1);
7098 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7100 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7101 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7102 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7106 auto C_2 =
B.buildConstant(Ty, 2);
7107 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7109 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7110 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7111 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7112 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7119 auto C_4 =
B.buildConstant(Ty, 4);
7120 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7121 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7123 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7124 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7126 assert(
Size<=128 &&
"Scalar size is too large for CTPOP lower algorithm");
7132 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7134 auto IsMulSupported = [
this](
const LLT Ty) {
7135 auto Action = LI.
getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7138 if (IsMulSupported(Ty)) {
7139 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7140 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7142 auto ResTmp = B8Count;
7143 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7144 auto ShiftC =
B.buildConstant(Ty, Shift);
7145 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7146 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7148 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7150 MI.eraseFromParent();
7163 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
C);
7171 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7180 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7181 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7204 MI.eraseFromParent();
7210 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7215 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7259 MI.eraseFromParent();
7273 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7274 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7278 return lowerFunnelShiftAsShifts(
MI);
7282 if (Result == UnableToLegalize)
7283 return lowerFunnelShiftAsShifts(
MI);
7288 auto [Dst, Src] =
MI.getFirst2Regs();
7302 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7316 {UnmergeSrc.getReg(0)});
7318 {UnmergeSrc.getReg(1)});
7323 MI.eraseFromParent();
7340 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
7344 LLT DstTy =
MRI.getType(DstReg);
7345 LLT SrcTy =
MRI.getType(SrcReg);
7365 for (
unsigned I = 0;
I < SplitSrcs.
size(); ++
I) {
7379 MI.eraseFromParent();
7388 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
7390 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
7391 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7394 MI.eraseFromParent();
7399 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
7401 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7402 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
7407 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7410 return lowerRotateWithReverseRotate(
MI);
7413 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7414 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7415 bool IsFShLegal =
false;
7416 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7417 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7421 MI.eraseFromParent();
7426 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7429 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7434 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7435 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7436 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
7442 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
7443 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7445 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7451 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
7452 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
7454 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7456 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7460 MIRBuilder.
buildOr(Dst, ShVal, RevShiftVal);
7461 MI.eraseFromParent();
7469 auto [Dst, Src] =
MI.getFirst2Regs();
7519 MI.eraseFromParent();
7527 auto [Dst, Src] =
MI.getFirst2Regs();
7554 MI.eraseFromParent();
7562 auto [Dst, Src] =
MI.getFirst2Regs();
7579 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7591 MI.eraseFromParent();
7596 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7602 MI.eraseFromParent();
7623 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7633 MI.eraseFromParent();
7658 MI.eraseFromParent();
7666 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7670 if (SrcTy !=
S64 && SrcTy !=
S32)
7672 if (DstTy !=
S32 && DstTy !=
S64)
7701 MI.eraseFromParent();
7706 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7765 MI.eraseFromParent();
7771 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7773 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7774 unsigned SatWidth = DstTy.getScalarSizeInBits();
7778 APInt MinInt, MaxInt;
7801 if (AreExactFloatBounds) {
7818 MI.eraseFromParent();
7825 DstTy.changeElementSize(1), Src, Src);
7828 MI.eraseFromParent();
7855 MI.eraseFromParent();
7863 DstTy.changeElementSize(1), Src, Src);
7865 MI.eraseFromParent();
7875 auto [Dst, Src] =
MI.getFirst2Regs();
7883 unsigned Flags =
MI.getFlags();
7886 MI.eraseFromParent();
7890 const unsigned ExpMask = 0x7ff;
7891 const unsigned ExpBiasf64 = 1023;
7892 const unsigned ExpBiasf16 = 15;
7981 MI.eraseFromParent();
7987 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
7998 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8003 MI.eraseFromParent();
8009 case TargetOpcode::G_SMIN:
8011 case TargetOpcode::G_SMAX:
8013 case TargetOpcode::G_UMIN:
8015 case TargetOpcode::G_UMAX:
8023 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8031 MI.eraseFromParent();
8071 unsigned BoolExtOp =
8078 MI.eraseFromParent();
8084 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8085 const int Src0Size = Src0Ty.getScalarSizeInBits();
8086 const int Src1Size = Src1Ty.getScalarSizeInBits();
8096 if (Src0Ty == Src1Ty) {
8098 }
else if (Src0Size > Src1Size) {
8113 unsigned Flags =
MI.getFlags();
8120 MI.eraseFromParent();
8126 unsigned NewOp =
MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8127 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8129 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8149 MI.eraseFromParent();
8157 unsigned Flags =
MI.getFlags();
8162 MI.eraseFromParent();
8168 auto [DstReg,
X] =
MI.getFirst2Regs();
8169 const unsigned Flags =
MI.getFlags();
8196 MI.eraseFromParent();
8201 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
8202 unsigned Flags =
MI.getFlags();
8214 SrcReg, Zero, Flags);
8216 SrcReg, Trunc, Flags);
8221 MI.eraseFromParent();
8227 const unsigned NumOps =
MI.getNumOperands();
8228 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
8229 unsigned PartSize = Src0Ty.getSizeInBits();
8234 for (
unsigned I = 2;
I != NumOps; ++
I) {
8235 const unsigned Offset = (
I - 1) * PartSize;
8240 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
8246 ResultReg = NextResult;
8249 if (DstTy.isPointer()) {
8251 DstTy.getAddressSpace())) {
8259 MI.eraseFromParent();
8265 const unsigned NumDst =
MI.getNumOperands() - 1;
8266 Register SrcReg =
MI.getOperand(NumDst).getReg();
8267 Register Dst0Reg =
MI.getOperand(0).getReg();
8282 unsigned Offset = DstSize;
8283 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
8289 MI.eraseFromParent();
8308 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8309 InsertVal =
MI.getOperand(2).getReg();
8323 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
8329 MI.eraseFromParent();
8334 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
8352 int64_t
Offset = IdxVal * EltBytes;
8371 MI.eraseFromParent();
8377 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8378 MI.getFirst3RegLLTs();
8386 for (
int Idx : Mask) {
8388 if (!Undef.isValid())
8394 if (Src0Ty.isScalar()) {
8397 int NumElts = Src0Ty.getNumElements();
8398 Register SrcVec =
Idx < NumElts ? Src0Reg : Src1Reg;
8399 int ExtractIdx =
Idx < NumElts ?
Idx :
Idx - NumElts;
8406 if (DstTy.isScalar())
8410 MI.eraseFromParent();
8416 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8417 MI.getFirst4RegLLTs();
8419 if (VecTy.isScalableVector())
8444 std::optional<APInt> PassthruSplatVal =
8447 if (PassthruSplatVal.has_value()) {
8450 }
else if (HasPassthru) {
8462 unsigned NumElmts = VecTy.getNumElements();
8463 for (
unsigned I = 0;
I < NumElmts; ++
I) {
8478 if (HasPassthru &&
I == NumElmts - 1) {
8484 {OutPos, EndOfVector});
8497 MI.eraseFromParent();
8514 if (Alignment >
Align(1)) {
8526 const auto &MF = *
MI.getMF();
8527 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8532 Register AllocSize =
MI.getOperand(1).getReg();
8543 MI.eraseFromParent();
8554 MI.eraseFromParent();
8565 MI.eraseFromParent();
8571 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8572 unsigned Offset =
MI.getOperand(2).getImm();
8577 unsigned DstSize = DstTy.getSizeInBits();
8579 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8586 for (
unsigned Idx =
Offset / SrcEltSize;
8590 if (SubVectorElts.
size() == 1)
8595 MI.eraseFromParent();
8600 if (DstTy.isScalar() &&
8603 LLT SrcIntTy = SrcTy;
8617 MI.eraseFromParent();
8625 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
8637 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8650 for (
unsigned i = 0;
Idx < (
Offset + InsertSize) / EltSize;
8652 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
8665 MI.eraseFromParent();
8679 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
8683 LLT IntDstTy = DstTy;
8709 MI.eraseFromParent();
8715 auto [Dst0, Dst0Ty, Dst1, Dst1Ty,
LHS, LHSTy,
RHS, RHSTy] =
8716 MI.getFirst4RegLLTs();
8717 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
8720 LLT BoolTy = Dst1Ty;
8739 auto ResultLowerThanLHS =
8747 MI.eraseFromParent();
8754 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8759 switch (
MI.getOpcode()) {
8762 case TargetOpcode::G_UADDSAT:
8765 BaseOp = TargetOpcode::G_ADD;
8767 case TargetOpcode::G_SADDSAT:
8770 BaseOp = TargetOpcode::G_ADD;
8772 case TargetOpcode::G_USUBSAT:
8775 BaseOp = TargetOpcode::G_SUB;
8777 case TargetOpcode::G_SSUBSAT:
8780 BaseOp = TargetOpcode::G_SUB;
8823 MI.eraseFromParent();
8829 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8834 unsigned OverflowOp;
8835 switch (
MI.getOpcode()) {
8838 case TargetOpcode::G_UADDSAT:
8841 OverflowOp = TargetOpcode::G_UADDO;
8843 case TargetOpcode::G_SADDSAT:
8846 OverflowOp = TargetOpcode::G_SADDO;
8848 case TargetOpcode::G_USUBSAT:
8851 OverflowOp = TargetOpcode::G_USUBO;
8853 case TargetOpcode::G_SSUBSAT:
8856 OverflowOp = TargetOpcode::G_SSUBO;
8862 Register Tmp = OverflowRes.getReg(0);
8863 Register Ov = OverflowRes.getReg(1);
8889 MI.eraseFromParent();
8895 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8896 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8897 "Expected shlsat opcode!");
8898 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8899 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8921 MI.eraseFromParent();
8926 auto [Dst, Src] =
MI.getFirst2Regs();
8929 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8938 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
8940 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8952 Res.getInstr()->getOperand(0).setReg(Dst);
8954 MI.eraseFromParent();
8961 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
8964 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8965 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8966 return B.buildOr(Dst,
LHS,
RHS);
8971 auto [Dst, Src] =
MI.getFirst2Regs();
8999 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
9019 MI.eraseFromParent();
9027 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9028 int NameOpIdx = IsRead ? 1 : 0;
9029 int ValRegIndex = IsRead ? 0 : 1;
9031 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
9033 const MDString *RegStr = cast<MDString>(
9034 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9045 MI.eraseFromParent();
9051 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
9052 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9061 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9067 MI.eraseFromParent();
9073 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9078 MI.eraseFromParent();
9083 MI.eraseFromParent();
9102 APInt ExpMask = Inf;
9120 LLT DstTyCopy = DstTy;
9135 Mask &= ~fcPosFinite;
9142 Mask &= ~fcNegFinite;
9153 Mask &= ~PartialCheck;
9162 else if (PartialCheck ==
fcZero)
9181 appendToRes(SubnormalRes);
9188 else if (PartialCheck ==
fcInf)
9201 if (PartialCheck ==
fcNan) {
9205 }
else if (PartialCheck ==
fcQNan) {
9215 Abs, InfWithQnanBitC);
9223 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
9226 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9237 appendToRes(NormalRes);
9241 MI.eraseFromParent();
9247 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9248 MI.getFirst4RegLLTs();
9250 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9259 if (MaskTy.isScalar()) {
9273 if (DstTy.isVector()) {
9276 MaskReg = ShufSplat.
getReg(0);
9281 }
else if (!DstTy.isVector()) {
9286 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9299 MI.eraseFromParent();
9305 unsigned Opcode =
MI.getOpcode();
9308 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9309 : TargetOpcode::G_UDIV,
9310 {
MI.getOperand(0).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
9312 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9313 : TargetOpcode::G_UREM,
9314 {
MI.getOperand(1).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
9315 MI.eraseFromParent();
9332 MI.eraseFromParent();
9347 MI.eraseFromParent();
9354 Register DestReg =
MI.getOperand(0).getReg();
9360 MI.eraseFromParent();
9376 MI.eraseFromParent();
9403 Register ListPtr =
MI.getOperand(1).getReg();
9413 const Align A(
MI.getOperand(2).getImm());
9420 VAList = AndDst.
getReg(0);
9438 Align EltAlignment =
DL.getABITypeAlign(Ty);
9443 MI.eraseFromParent();
9458 unsigned Limit,
const MemOp &
Op,
9459 unsigned DstAS,
unsigned SrcAS,
9462 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
9472 if (
Op.isFixedDstAlign())
9480 unsigned NumMemOps = 0;
9484 while (TySize >
Size) {
9493 assert(NewTySize > 0 &&
"Could not find appropriate type");
9500 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
9502 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
9512 if (++NumMemOps > Limit)
9515 MemOps.push_back(Ty);
9527 if (!Ty.
isVector() && ValVRegAndVal) {
9528 APInt Scalar = ValVRegAndVal->Value.trunc(8);
9536 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9559 auto &MF = *
MI.getParent()->getParent();
9560 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9561 auto &
DL = MF.getDataLayout();
9564 assert(KnownLen != 0 &&
"Have a zero length memset length!");
9566 bool DstAlignCanChange =
false;
9572 DstAlignCanChange =
true;
9575 std::vector<LLT> MemOps;
9577 const auto &DstMMO = **
MI.memoperands_begin();
9581 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9589 MF.getFunction().getAttributes(), TLI))
9592 if (DstAlignCanChange) {
9595 Align NewAlign =
DL.getABITypeAlign(IRTy);
9596 if (NewAlign > Alignment) {
9597 Alignment = NewAlign;
9607 LLT LargestTy = MemOps[0];
9608 for (
unsigned i = 1; i < MemOps.size(); i++)
9610 LargestTy = MemOps[i];
9623 unsigned DstOff = 0;
9624 unsigned Size = KnownLen;
9625 for (
unsigned I = 0;
I < MemOps.size();
I++) {
9628 if (TySize >
Size) {
9631 assert(
I == MemOps.size() - 1 &&
I != 0);
9632 DstOff -= TySize -
Size;
9643 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9650 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9656 Ptr = MIB.buildPtrAdd(PtrTy, Dst,
Offset).getReg(0);
9659 MIB.buildStore(
Value,
Ptr, *StoreMMO);
9664 MI.eraseFromParent();
9670 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9672 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9674 const auto *MMOIt =
MI.memoperands_begin();
9676 bool IsVolatile =
MemOp->isVolatile();
9682 "inline memcpy with dynamic size is not yet supported");
9683 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9684 if (KnownLen == 0) {
9685 MI.eraseFromParent();
9689 const auto &DstMMO = **
MI.memoperands_begin();
9690 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9691 Align DstAlign = DstMMO.getBaseAlign();
9692 Align SrcAlign = SrcMMO.getBaseAlign();
9694 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9701 Align SrcAlign,
bool IsVolatile) {
9702 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9703 return lowerMemcpy(
MI, Dst, Src, KnownLen,
9704 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9711 Align SrcAlign,
bool IsVolatile) {
9712 auto &MF = *
MI.getParent()->getParent();
9713 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9714 auto &
DL = MF.getDataLayout();
9717 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
9719 bool DstAlignCanChange =
false;
9721 Align Alignment = std::min(DstAlign, SrcAlign);
9725 DstAlignCanChange =
true;
9731 std::vector<LLT> MemOps;
9733 const auto &DstMMO = **
MI.memoperands_begin();
9734 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9740 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9743 MF.getFunction().getAttributes(), TLI))
9746 if (DstAlignCanChange) {
9749 Align NewAlign =
DL.getABITypeAlign(IRTy);
9754 if (!
TRI->hasStackRealignment(MF))
9756 NewAlign = std::min(NewAlign, *StackAlign);
9758 if (NewAlign > Alignment) {
9759 Alignment = NewAlign;
9767 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
9775 unsigned CurrOffset = 0;
9776 unsigned Size = KnownLen;
9777 for (
auto CopyTy : MemOps) {
9780 if (CopyTy.getSizeInBytes() >
Size)
9781 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
9785 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9787 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9792 if (CurrOffset != 0) {
9796 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9798 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9802 if (CurrOffset != 0) {
9804 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9806 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9807 CurrOffset += CopyTy.getSizeInBytes();
9808 Size -= CopyTy.getSizeInBytes();
9811 MI.eraseFromParent();
9819 auto &MF = *
MI.getParent()->getParent();
9820 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9821 auto &
DL = MF.getDataLayout();
9824 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
9826 bool DstAlignCanChange =
false;
9829 Align Alignment = std::min(DstAlign, SrcAlign);
9833 DstAlignCanChange =
true;
9836 std::vector<LLT> MemOps;
9838 const auto &DstMMO = **
MI.memoperands_begin();
9839 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9848 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9851 MF.getFunction().getAttributes(), TLI))
9854 if (DstAlignCanChange) {
9857 Align NewAlign =
DL.getABITypeAlign(IRTy);
9862 if (!
TRI->hasStackRealignment(MF))
9864 NewAlign = std::min(NewAlign, *StackAlign);
9866 if (NewAlign > Alignment) {
9867 Alignment = NewAlign;
9875 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
9881 unsigned CurrOffset = 0;
9883 for (
auto CopyTy : MemOps) {
9886 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9890 if (CurrOffset != 0) {
9894 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9896 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9897 CurrOffset += CopyTy.getSizeInBytes();
9901 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
9902 LLT CopyTy = MemOps[
I];
9905 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.
getSizeInBytes());
9908 if (CurrOffset != 0) {
9912 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9914 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
9917 MI.eraseFromParent();
9923 const unsigned Opc =
MI.getOpcode();
9926 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9927 Opc == TargetOpcode::G_MEMSET) &&
9928 "Expected memcpy like instruction");
9930 auto MMOIt =
MI.memoperands_begin();
9935 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9937 if (Opc != TargetOpcode::G_MEMSET) {
9938 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
9940 SrcAlign =
MemOp->getBaseAlign();
9947 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9949 if (KnownLen == 0) {
9950 MI.eraseFromParent();
9954 bool IsVolatile =
MemOp->isVolatile();
9955 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9956 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9963 if (MaxLen && KnownLen > MaxLen)
9966 if (Opc == TargetOpcode::G_MEMCPY) {
9967 auto &MF = *
MI.getParent()->getParent();
9968 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9971 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9974 if (Opc == TargetOpcode::G_MEMMOVE)
9975 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9976 if (Opc == TargetOpcode::G_MEMSET)
9977 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool hasAttributes() const
Return true if the builder has IR-level attributes.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Type * getReturnType() const
Returns the type of the ret val.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult lowerFAbs(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
StringRef getString() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOUI Src0.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLT getLLTForMVT(MVT Ty)
Get a rough equivalent of an LLT for a given MVT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
OutputIt copy(R &&Range, OutputIt Out)
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)