43#define DEBUG_TYPE "legalizer"
56static std::pair<int, int>
62 unsigned NumParts =
Size / NarrowSize;
63 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
66 if (LeftoverSize == 0)
71 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
88 switch (Ty.getSizeInBits()) {
129 auto Step = LI.getAction(
MI, MRI);
130 switch (Step.Action) {
145 return bitcast(
MI, Step.TypeIdx, Step.NewType);
148 return lower(
MI, Step.TypeIdx, Step.NewType);
157 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
165void LegalizerHelper::insertParts(
Register DstReg,
187 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
189 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
203 LLT Ty = MRI.getType(
Reg);
211void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
214 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
218 if (!MRI.getType(Leftover).isVector())
221 appendVectorElts(AllElts, Leftover);
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
229 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
231 const int StartIdx = Regs.
size();
232 const int NumResults =
MI.getNumOperands() - 1;
234 for (
int I = 0;
I != NumResults; ++
I)
235 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
247 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
254 LLT SrcTy = MRI.getType(SrcReg);
256 extractGCDType(Parts, GCDTy, SrcReg);
260LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
262 unsigned PadStrategy) {
267 int NumOrigSrc = VRegs.
size();
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
279 assert(PadStrategy == TargetOpcode::G_SEXT);
284 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
300 for (
int I = 0;
I != NumParts; ++
I) {
301 bool AllMergePartsArePadding =
true;
304 for (
int J = 0; J != NumSubParts; ++J) {
305 int Idx =
I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
311 SubMerge[J] = VRegs[Idx];
314 AllMergePartsArePadding =
false;
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
333 Remerge[
I] = AllPadReg;
337 if (NumSubParts == 1)
338 Remerge[
I] = SubMerge[0];
340 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[
I];
347 VRegs = std::move(Remerge);
351void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
353 LLT DstTy = MRI.getType(DstReg);
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
363 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
372 UnmergeDefs[0] = DstReg;
373 for (
unsigned I = 1;
I != NumDefs; ++
I)
374 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
385#define RTLIBCASE_INT(LibcallPrefix) \
389 return RTLIB::LibcallPrefix##32; \
391 return RTLIB::LibcallPrefix##64; \
393 return RTLIB::LibcallPrefix##128; \
395 llvm_unreachable("unexpected size"); \
399#define RTLIBCASE(LibcallPrefix) \
403 return RTLIB::LibcallPrefix##32; \
405 return RTLIB::LibcallPrefix##64; \
407 return RTLIB::LibcallPrefix##80; \
409 return RTLIB::LibcallPrefix##128; \
411 llvm_unreachable("unexpected size"); \
416 case TargetOpcode::G_LROUND:
418 case TargetOpcode::G_LLROUND:
420 case TargetOpcode::G_MUL:
422 case TargetOpcode::G_SDIV:
424 case TargetOpcode::G_UDIV:
426 case TargetOpcode::G_SREM:
428 case TargetOpcode::G_UREM:
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
432 case TargetOpcode::G_FADD:
434 case TargetOpcode::G_FSUB:
436 case TargetOpcode::G_FMUL:
438 case TargetOpcode::G_FDIV:
440 case TargetOpcode::G_FEXP:
442 case TargetOpcode::G_FEXP2:
444 case TargetOpcode::G_FEXP10:
446 case TargetOpcode::G_FREM:
448 case TargetOpcode::G_FPOW:
450 case TargetOpcode::G_FPOWI:
452 case TargetOpcode::G_FMA:
454 case TargetOpcode::G_FSIN:
456 case TargetOpcode::G_FCOS:
458 case TargetOpcode::G_FTAN:
460 case TargetOpcode::G_FASIN:
462 case TargetOpcode::G_FACOS:
464 case TargetOpcode::G_FATAN:
466 case TargetOpcode::G_FATAN2:
468 case TargetOpcode::G_FSINH:
470 case TargetOpcode::G_FCOSH:
472 case TargetOpcode::G_FTANH:
474 case TargetOpcode::G_FSINCOS:
476 case TargetOpcode::G_FMODF:
478 case TargetOpcode::G_FLOG10:
480 case TargetOpcode::G_FLOG:
482 case TargetOpcode::G_FLOG2:
484 case TargetOpcode::G_FLDEXP:
486 case TargetOpcode::G_FCEIL:
488 case TargetOpcode::G_FFLOOR:
490 case TargetOpcode::G_FMINNUM:
492 case TargetOpcode::G_FMAXNUM:
494 case TargetOpcode::G_FMINIMUMNUM:
496 case TargetOpcode::G_FMAXIMUMNUM:
498 case TargetOpcode::G_FSQRT:
500 case TargetOpcode::G_FRINT:
502 case TargetOpcode::G_FNEARBYINT:
504 case TargetOpcode::G_INTRINSIC_TRUNC:
506 case TargetOpcode::G_INTRINSIC_ROUND:
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
510 case TargetOpcode::G_INTRINSIC_LRINT:
512 case TargetOpcode::G_INTRINSIC_LLRINT:
532 AttributeList CallerAttrs =
F.getAttributes();
533 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
552 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
559 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
567 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
570 if (Ret->getNumImplicitOperands() != 1)
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
590 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
595 Info.OrigRet = Result;
598 (Result.Ty->isVoidTy() ||
599 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
607 if (
MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
618 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
622 Next->eraseFromParent();
623 }
while (
MI->getNextNode());
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
657 Args.push_back({MO.getReg(), OpType, 0});
671 LLT DstTy =
MRI.getType(DstSin);
676 unsigned AddrSpace =
DL.getAllocaAddrSpace();
694 if (LibcallResult != LegalizeResult::Legalized)
702 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
719 LLT DstTy = MRI.getType(DstFrac);
724 unsigned AddrSpace =
DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
737 if (LibcallResult != LegalizeResult::Legalized)
743 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
755 case TargetOpcode::G_FPEXT:
757 case TargetOpcode::G_FPTRUNC:
759 case TargetOpcode::G_FPTOSI:
761 case TargetOpcode::G_FPTOUI:
763 case TargetOpcode::G_SITOFP:
765 case TargetOpcode::G_UITOFP:
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.
Flags[0].setSExt();
779 Arg.
Flags[0].setZExt();
790 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
794 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy =
nullptr;
804 Args.push_back({Reg, OpTy, 0});
807 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc =
MI.getOpcode();
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
836 if (RTLibcallImpl == RTLIB::Unsupported) {
843 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
850 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
868 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
872 Next->eraseFromParent();
873 }
while (
MI.getNextNode());
883 unsigned Opc =
MI.getOpcode();
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
890 return RTLIB::UNKNOWN_LIBCALL;
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
923 return RTLIB::UNKNOWN_LIBCALL;
936 unsigned Opc =
MI.getOpcode();
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
946 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) =
MI.getFirst5RegLLTs();
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
967 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
971 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
986 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
991 if (RTLibcallImpl == RTLIB::Unsupported) {
998 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (
MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1060 LLT StateTy = MRI.getType(Dst);
1063 MachinePointerInfo TempPtrInfo;
1067 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1098 LLT StateTy = MRI.getType(Src);
1101 MachinePointerInfo TempPtrInfo;
1110 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver,
nullptr);
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1134 llvm_unreachable("unexpected size"); \
1165 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1168 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond =
Cmp->getCond();
1181 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1190 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1197 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1205 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1239 const auto [OeqLibcall, OeqPred] =
1244 const auto [UnoLibcall, UnoPred] =
1249 if (NotOeq && NotUno)
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1295 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1297 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1311 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1313 switch (
MI.getOpcode()) {
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1369 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1382 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1392 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1405 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1407 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1413 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1416 MI.eraseFromParent();
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1425 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1427 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1432 {
MI.getOperand(1).getReg(), HLTy, 0},
1433 {
MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1445 if (!FromTy || !ToTy)
1452 case TargetOpcode::G_FCMP: {
1456 MI.eraseFromParent();
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1464 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1468 FromTy, LocObserver);
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1508 MI.eraseFromParent();
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1534 MI.eraseFromParent();
1541 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1544 switch (
MI.getOpcode()) {
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1549 LLT DstTy = MRI.getType(DstReg);
1557 if (SizeOp0 % NarrowSize != 0) {
1562 MI.eraseFromParent();
1566 int NumParts = SizeOp0 / NarrowSize;
1569 for (
int i = 0; i < NumParts; ++i)
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1581 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1587 for (
int I = 0;
I != NumParts; ++
I) {
1588 unsigned Offset =
I * NarrowSize;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1597 if (LeftoverBits != 0) {
1601 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1605 insertParts(
MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1608 MI.eraseFromParent();
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1615 case TargetOpcode::G_TRUNC: {
1619 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1621 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1625 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1626 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1635 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1640 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1642 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1644 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1648 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1666 case TargetOpcode::G_EXTRACT:
1668 case TargetOpcode::G_INSERT:
1670 case TargetOpcode::G_LOAD: {
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1681 LoadMI.eraseFromParent();
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD: {
1690 Register DstReg = LoadMI.getDstReg();
1691 Register PtrReg = LoadMI.getPointerReg();
1693 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1694 auto &MMO = LoadMI.getMMO();
1697 if (MemSize == NarrowSize) {
1699 }
else if (MemSize < NarrowSize) {
1700 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1701 }
else if (MemSize > NarrowSize) {
1711 LoadMI.eraseFromParent();
1714 case TargetOpcode::G_STORE: {
1717 Register SrcReg = StoreMI.getValueReg();
1718 LLT SrcTy = MRI.getType(SrcReg);
1719 if (SrcTy.isVector())
1722 int NumParts = SizeOp0 / NarrowSize;
1724 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1725 if (SrcTy.isVector() && LeftoverBits != 0)
1728 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1729 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1731 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1732 StoreMI.eraseFromParent();
1738 case TargetOpcode::G_SELECT:
1740 case TargetOpcode::G_AND:
1741 case TargetOpcode::G_OR:
1742 case TargetOpcode::G_XOR: {
1754 case TargetOpcode::G_SHL:
1755 case TargetOpcode::G_LSHR:
1756 case TargetOpcode::G_ASHR:
1758 case TargetOpcode::G_CTLZ:
1759 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1760 case TargetOpcode::G_CTTZ:
1761 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTLS:
1763 case TargetOpcode::G_CTPOP:
1765 switch (
MI.getOpcode()) {
1766 case TargetOpcode::G_CTLZ:
1767 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1769 case TargetOpcode::G_CTTZ:
1770 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1772 case TargetOpcode::G_CTPOP:
1774 case TargetOpcode::G_CTLS:
1784 case TargetOpcode::G_INTTOPTR:
1792 case TargetOpcode::G_PTRTOINT:
1800 case TargetOpcode::G_PHI: {
1803 if (SizeOp0 % NarrowSize != 0)
1806 unsigned NumParts = SizeOp0 / NarrowSize;
1810 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1818 for (
unsigned i = 0; i < NumParts; ++i) {
1819 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1821 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1822 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1823 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1826 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1828 MI.eraseFromParent();
1831 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1832 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1842 case TargetOpcode::G_ICMP: {
1844 LLT SrcTy = MRI.getType(LHS);
1850 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1856 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1857 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1863 LLT ResTy = MRI.getType(Dst);
1868 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1870 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1871 auto LHS = std::get<0>(LHSAndRHS);
1872 auto RHS = std::get<1>(LHSAndRHS);
1873 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1880 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1881 auto LHS = std::get<0>(LHSAndRHS);
1882 auto RHS = std::get<1>(LHSAndRHS);
1883 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1884 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1885 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1886 TargetOpcode::G_ZEXT);
1893 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1894 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1895 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1900 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1904 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1909 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1916 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1919 LHSPartRegs[
I], RHSPartRegs[
I]);
1920 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1926 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1940 RHSLeftoverRegs[
I]);
1942 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1943 RHSLeftoverRegs[
I]);
1946 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1947 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1953 MI.eraseFromParent();
1956 case TargetOpcode::G_FCMP:
1965 case TargetOpcode::G_SEXT_INREG: {
1969 int64_t SizeInBits =
MI.getOperand(2).getImm();
1978 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
1979 MO1.
setReg(TruncMIB.getReg(0));
1982 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1994 if (SizeOp0 % NarrowSize != 0)
1996 int NumParts = SizeOp0 / NarrowSize;
2004 for (
int i = 0; i < NumParts; ++i) {
2005 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2020 for (
int i = 0; i < NumParts; ++i) {
2023 PartialExtensionReg = DstRegs.
back();
2025 assert(PartialExtensionReg &&
2026 "Expected to visit partial extension before full");
2027 if (FullExtensionReg) {
2032 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2034 FullExtensionReg = DstRegs.
back();
2039 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2042 PartialExtensionReg = DstRegs.
back();
2048 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2049 MI.eraseFromParent();
2052 case TargetOpcode::G_BSWAP:
2053 case TargetOpcode::G_BITREVERSE: {
2054 if (SizeOp0 % NarrowSize != 0)
2059 unsigned NumParts = SizeOp0 / NarrowSize;
2060 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2063 for (
unsigned i = 0; i < NumParts; ++i) {
2064 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2065 {SrcRegs[NumParts - 1 - i]});
2069 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2072 MI.eraseFromParent();
2075 case TargetOpcode::G_PTR_ADD:
2076 case TargetOpcode::G_PTRMASK: {
2084 case TargetOpcode::G_FPTOUI:
2085 case TargetOpcode::G_FPTOSI:
2086 case TargetOpcode::G_FPTOUI_SAT:
2087 case TargetOpcode::G_FPTOSI_SAT:
2089 case TargetOpcode::G_FPEXT:
2096 case TargetOpcode::G_FLDEXP:
2097 case TargetOpcode::G_STRICT_FLDEXP:
2099 case TargetOpcode::G_VSCALE: {
2101 LLT Ty = MRI.getType(Dst);
2105 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2106 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2107 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2110 MI.eraseFromParent();
2117 LLT Ty = MRI.getType(Val);
2123 if (Ty.isPointer()) {
2124 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2126 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2132 if (Ty.isPointerVector())
2133 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2134 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138 unsigned OpIdx,
unsigned ExtOpcode) {
2140 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2141 MO.
setReg(ExtB.getReg(0));
2147 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2148 MO.
setReg(ExtB.getReg(0));
2152 unsigned OpIdx,
unsigned TruncOpcode) {
2154 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2156 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2161 unsigned OpIdx,
unsigned ExtOpcode) {
2163 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2165 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2174 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2176 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2182 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2192 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2199LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2204 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2205 if (DstTy.isVector())
2208 LLT SrcTy =
MRI.getType(Src1Reg);
2209 const int DstSize = DstTy.getSizeInBits();
2210 const int SrcSize = SrcTy.getSizeInBits();
2212 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2214 unsigned NumOps =
MI.getNumOperands();
2215 unsigned NumSrc =
MI.getNumOperands() - 1;
2216 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2218 if (WideSize >= DstSize) {
2222 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2223 const unsigned Offset = (
I - 1) * PartSize;
2231 MRI.createGenericVirtualRegister(WideTy);
2236 ResultReg = NextResult;
2239 if (WideSize > DstSize)
2241 else if (DstTy.isPointer())
2244 MI.eraseFromParent();
2269 const int GCD = std::gcd(SrcSize, WideSize);
2279 if (GCD == SrcSize) {
2282 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2283 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2289 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2291 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2295 const int PartsPerGCD = WideSize / GCD;
2299 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2301 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2308 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2310 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2311 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2314 MI.eraseFromParent();
2319LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2324 int NumDst =
MI.getNumOperands() - 1;
2325 Register SrcReg =
MI.getOperand(NumDst).getReg();
2326 LLT SrcTy = MRI.getType(SrcReg);
2330 Register Dst0Reg =
MI.getOperand(0).getReg();
2331 LLT DstTy = MRI.getType(Dst0Reg);
2340 dbgs() <<
"Not casting non-integral address space integer\n");
2345 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2353 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2361 for (
int I = 1;
I != NumDst; ++
I) {
2362 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2363 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MI.eraseFromParent();
2378 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2382 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2385 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2403 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2408 if (PartsPerRemerge == 1) {
2411 for (
int I = 0;
I != NumUnmerge; ++
I) {
2412 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2414 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2415 int Idx =
I * PartsPerUnmerge + J;
2417 MIB.addDef(
MI.getOperand(Idx).getReg());
2420 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 MIB.addUse(Unmerge.getReg(
I));
2427 SmallVector<Register, 16> Parts;
2428 for (
int J = 0; J != NumUnmerge; ++J)
2429 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2432 for (
int I = 0;
I != NumDst; ++
I) {
2433 for (
int J = 0; J < PartsPerRemerge; ++J) {
2434 const int Idx =
I * PartsPerRemerge + J;
2438 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2439 RemergeParts.
clear();
2443 MI.eraseFromParent();
2448LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2450 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2451 unsigned Offset =
MI.getOperand(2).getImm();
2454 if (SrcTy.
isVector() || DstTy.isVector())
2466 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 if (DstTy.isPointer())
2477 MI.eraseFromParent();
2482 LLT ShiftTy = SrcTy;
2491 MI.eraseFromParent();
2522LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2524 if (TypeIdx != 0 || WideTy.
isVector())
2534LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2538 std::optional<Register> CarryIn;
2539 switch (
MI.getOpcode()) {
2542 case TargetOpcode::G_SADDO:
2543 Opcode = TargetOpcode::G_ADD;
2544 ExtOpcode = TargetOpcode::G_SEXT;
2546 case TargetOpcode::G_SSUBO:
2547 Opcode = TargetOpcode::G_SUB;
2548 ExtOpcode = TargetOpcode::G_SEXT;
2550 case TargetOpcode::G_UADDO:
2551 Opcode = TargetOpcode::G_ADD;
2552 ExtOpcode = TargetOpcode::G_ZEXT;
2554 case TargetOpcode::G_USUBO:
2555 Opcode = TargetOpcode::G_SUB;
2556 ExtOpcode = TargetOpcode::G_ZEXT;
2558 case TargetOpcode::G_SADDE:
2559 Opcode = TargetOpcode::G_UADDE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn =
MI.getOperand(4).getReg();
2563 case TargetOpcode::G_SSUBE:
2564 Opcode = TargetOpcode::G_USUBE;
2565 ExtOpcode = TargetOpcode::G_SEXT;
2566 CarryIn =
MI.getOperand(4).getReg();
2568 case TargetOpcode::G_UADDE:
2569 Opcode = TargetOpcode::G_UADDE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn =
MI.getOperand(4).getReg();
2573 case TargetOpcode::G_USUBE:
2574 Opcode = TargetOpcode::G_USUBE;
2575 ExtOpcode = TargetOpcode::G_ZEXT;
2576 CarryIn =
MI.getOperand(4).getReg();
2592 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2593 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2597 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2599 .buildInstr(Opcode, {WideTy, CarryOutTy},
2600 {LHSExt, RHSExt, *CarryIn})
2603 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2605 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2606 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2607 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2612 MI.eraseFromParent();
2617LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2619 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2621 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2622 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2623 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2636 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2643 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2648 {ShiftL, ShiftR},
MI.getFlags());
2653 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2656 MI.eraseFromParent();
2661LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2670 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2672 LLT SrcTy = MRI.getType(
LHS);
2673 LLT OverflowTy = MRI.getType(OriginalOverflow);
2680 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2681 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2682 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2689 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2691 MachineInstrBuilder Mulo;
2692 if (WideMulCanOverflow)
2693 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2694 {LeftOperand, RightOperand});
2696 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2701 MachineInstrBuilder ExtResult;
2708 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2712 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2715 if (WideMulCanOverflow) {
2723 MI.eraseFromParent();
2729 unsigned Opcode =
MI.getOpcode();
2733 case TargetOpcode::G_ATOMICRMW_XCHG:
2734 case TargetOpcode::G_ATOMICRMW_ADD:
2735 case TargetOpcode::G_ATOMICRMW_SUB:
2736 case TargetOpcode::G_ATOMICRMW_AND:
2737 case TargetOpcode::G_ATOMICRMW_OR:
2738 case TargetOpcode::G_ATOMICRMW_XOR:
2739 case TargetOpcode::G_ATOMICRMW_MIN:
2740 case TargetOpcode::G_ATOMICRMW_MAX:
2741 case TargetOpcode::G_ATOMICRMW_UMIN:
2742 case TargetOpcode::G_ATOMICRMW_UMAX:
2743 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2749 case TargetOpcode::G_ATOMIC_CMPXCHG:
2750 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2757 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2767 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2772 case TargetOpcode::G_EXTRACT:
2773 return widenScalarExtract(
MI, TypeIdx, WideTy);
2774 case TargetOpcode::G_INSERT:
2775 return widenScalarInsert(
MI, TypeIdx, WideTy);
2776 case TargetOpcode::G_MERGE_VALUES:
2777 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2778 case TargetOpcode::G_UNMERGE_VALUES:
2779 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2780 case TargetOpcode::G_SADDO:
2781 case TargetOpcode::G_SSUBO:
2782 case TargetOpcode::G_UADDO:
2783 case TargetOpcode::G_USUBO:
2784 case TargetOpcode::G_SADDE:
2785 case TargetOpcode::G_SSUBE:
2786 case TargetOpcode::G_UADDE:
2787 case TargetOpcode::G_USUBE:
2788 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2789 case TargetOpcode::G_UMULO:
2790 case TargetOpcode::G_SMULO:
2791 return widenScalarMulo(
MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_SADDSAT:
2793 case TargetOpcode::G_SSUBSAT:
2794 case TargetOpcode::G_SSHLSAT:
2795 case TargetOpcode::G_UADDSAT:
2796 case TargetOpcode::G_USUBSAT:
2797 case TargetOpcode::G_USHLSAT:
2798 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_CTTZ:
2800 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2801 case TargetOpcode::G_CTLZ:
2802 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2803 case TargetOpcode::G_CTLS:
2804 case TargetOpcode::G_CTPOP: {
2817 case TargetOpcode::G_CTTZ:
2818 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2819 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2820 ExtOpc = TargetOpcode::G_ANYEXT;
2822 case TargetOpcode::G_CTLS:
2823 ExtOpc = TargetOpcode::G_SEXT;
2826 ExtOpc = TargetOpcode::G_ZEXT;
2829 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2830 LLT CurTy = MRI.getType(SrcReg);
2831 unsigned NewOpc = Opcode;
2832 if (NewOpc == TargetOpcode::G_CTTZ) {
2839 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2841 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2846 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2856 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2858 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2861 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff));
2864 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2865 MI.eraseFromParent();
2868 case TargetOpcode::G_BSWAP: {
2872 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2873 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2874 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2877 MI.getOperand(0).setReg(DstExt);
2881 LLT Ty = MRI.getType(DstReg);
2883 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2884 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2890 case TargetOpcode::G_BITREVERSE: {
2894 LLT Ty = MRI.getType(DstReg);
2897 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2899 MI.getOperand(0).setReg(DstExt);
2902 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2903 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2908 case TargetOpcode::G_FREEZE:
2909 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2916 case TargetOpcode::G_ABS:
2923 case TargetOpcode::G_ADD:
2924 case TargetOpcode::G_AND:
2925 case TargetOpcode::G_MUL:
2926 case TargetOpcode::G_OR:
2927 case TargetOpcode::G_XOR:
2928 case TargetOpcode::G_SUB:
2929 case TargetOpcode::G_SHUFFLE_VECTOR:
2940 case TargetOpcode::G_SBFX:
2941 case TargetOpcode::G_UBFX:
2955 case TargetOpcode::G_SHL:
2971 case TargetOpcode::G_ROTR:
2972 case TargetOpcode::G_ROTL:
2981 case TargetOpcode::G_SDIV:
2982 case TargetOpcode::G_SREM:
2983 case TargetOpcode::G_SMIN:
2984 case TargetOpcode::G_SMAX:
2985 case TargetOpcode::G_ABDS:
2993 case TargetOpcode::G_SDIVREM:
3003 case TargetOpcode::G_ASHR:
3004 case TargetOpcode::G_LSHR:
3008 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3009 : TargetOpcode::G_ZEXT;
3022 case TargetOpcode::G_UDIV:
3023 case TargetOpcode::G_UREM:
3024 case TargetOpcode::G_ABDU:
3031 case TargetOpcode::G_UDIVREM:
3040 case TargetOpcode::G_UMIN:
3041 case TargetOpcode::G_UMAX: {
3042 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3044 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3048 ? TargetOpcode::G_SEXT
3049 : TargetOpcode::G_ZEXT;
3059 case TargetOpcode::G_SELECT:
3069 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3076 case TargetOpcode::G_FPEXT:
3084 case TargetOpcode::G_FPTOSI:
3085 case TargetOpcode::G_FPTOUI:
3086 case TargetOpcode::G_INTRINSIC_LRINT:
3087 case TargetOpcode::G_INTRINSIC_LLRINT:
3088 case TargetOpcode::G_IS_FPCLASS:
3098 case TargetOpcode::G_SITOFP:
3108 case TargetOpcode::G_UITOFP:
3118 case TargetOpcode::G_FPTOSI_SAT:
3119 case TargetOpcode::G_FPTOUI_SAT:
3124 LLT Ty = MRI.getType(OldDst);
3125 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3127 MI.getOperand(0).setReg(ExtReg);
3128 uint64_t ShortBits = Ty.getScalarSizeInBits();
3131 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3142 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3143 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3151 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3159 case TargetOpcode::G_LOAD:
3160 case TargetOpcode::G_SEXTLOAD:
3161 case TargetOpcode::G_ZEXTLOAD:
3167 case TargetOpcode::G_STORE: {
3171 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3172 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3173 if (!Ty.isScalar()) {
3181 MI.setMemRefs(MF, {NewMMO});
3188 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3189 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3195 case TargetOpcode::G_CONSTANT: {
3198 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3199 MRI.getType(
MI.getOperand(0).getReg()));
3200 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3201 ExtOpc == TargetOpcode::G_ANYEXT) &&
3204 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3208 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3214 case TargetOpcode::G_FCONSTANT: {
3220 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3222 MI.eraseFromParent();
3225 case TargetOpcode::G_IMPLICIT_DEF: {
3231 case TargetOpcode::G_BRCOND:
3237 case TargetOpcode::G_FCMP:
3248 case TargetOpcode::G_ICMP:
3253 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3257 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3258 unsigned ExtOpcode =
3262 ? TargetOpcode::G_SEXT
3263 : TargetOpcode::G_ZEXT;
3270 case TargetOpcode::G_PTR_ADD:
3271 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3277 case TargetOpcode::G_PHI: {
3278 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3281 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3293 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3296 LLT VecTy = MRI.getType(VecReg);
3302 TargetOpcode::G_ANYEXT);
3316 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3332 LLT VecTy = MRI.getType(VecReg);
3351 case TargetOpcode::G_FADD:
3352 case TargetOpcode::G_FMUL:
3353 case TargetOpcode::G_FSUB:
3354 case TargetOpcode::G_FMA:
3355 case TargetOpcode::G_FMAD:
3356 case TargetOpcode::G_FNEG:
3357 case TargetOpcode::G_FABS:
3358 case TargetOpcode::G_FCANONICALIZE:
3359 case TargetOpcode::G_FMINNUM:
3360 case TargetOpcode::G_FMAXNUM:
3361 case TargetOpcode::G_FMINNUM_IEEE:
3362 case TargetOpcode::G_FMAXNUM_IEEE:
3363 case TargetOpcode::G_FMINIMUM:
3364 case TargetOpcode::G_FMAXIMUM:
3365 case TargetOpcode::G_FMINIMUMNUM:
3366 case TargetOpcode::G_FMAXIMUMNUM:
3367 case TargetOpcode::G_FDIV:
3368 case TargetOpcode::G_FREM:
3369 case TargetOpcode::G_FCEIL:
3370 case TargetOpcode::G_FFLOOR:
3371 case TargetOpcode::G_FCOS:
3372 case TargetOpcode::G_FSIN:
3373 case TargetOpcode::G_FTAN:
3374 case TargetOpcode::G_FACOS:
3375 case TargetOpcode::G_FASIN:
3376 case TargetOpcode::G_FATAN:
3377 case TargetOpcode::G_FATAN2:
3378 case TargetOpcode::G_FCOSH:
3379 case TargetOpcode::G_FSINH:
3380 case TargetOpcode::G_FTANH:
3381 case TargetOpcode::G_FLOG10:
3382 case TargetOpcode::G_FLOG:
3383 case TargetOpcode::G_FLOG2:
3384 case TargetOpcode::G_FRINT:
3385 case TargetOpcode::G_FNEARBYINT:
3386 case TargetOpcode::G_FSQRT:
3387 case TargetOpcode::G_FEXP:
3388 case TargetOpcode::G_FEXP2:
3389 case TargetOpcode::G_FEXP10:
3390 case TargetOpcode::G_FPOW:
3391 case TargetOpcode::G_INTRINSIC_TRUNC:
3392 case TargetOpcode::G_INTRINSIC_ROUND:
3393 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3397 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3403 case TargetOpcode::G_FMODF: {
3413 case TargetOpcode::G_FPOWI:
3414 case TargetOpcode::G_FLDEXP:
3415 case TargetOpcode::G_STRICT_FLDEXP: {
3417 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3438 case TargetOpcode::G_FFREXP: {
3451 case TargetOpcode::G_LROUND:
3452 case TargetOpcode::G_LLROUND:
3463 case TargetOpcode::G_INTTOPTR:
3471 case TargetOpcode::G_PTRTOINT:
3479 case TargetOpcode::G_BUILD_VECTOR: {
3483 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3489 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3497 case TargetOpcode::G_SEXT_INREG:
3506 case TargetOpcode::G_PTRMASK: {
3514 case TargetOpcode::G_VECREDUCE_ADD: {
3523 case TargetOpcode::G_VECREDUCE_FADD:
3524 case TargetOpcode::G_VECREDUCE_FMUL:
3525 case TargetOpcode::G_VECREDUCE_FMIN:
3526 case TargetOpcode::G_VECREDUCE_FMAX:
3527 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3528 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3533 LLT VecTy = MRI.getType(VecReg);
3540 case TargetOpcode::G_VSCALE: {
3547 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3552 case TargetOpcode::G_SPLAT_VECTOR: {
3561 case TargetOpcode::G_INSERT_SUBVECTOR: {
3569 LLT SubVecTy = MRI.getType(SubVec);
3573 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3574 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3575 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3579 auto SplatZero =
MIRBuilder.buildSplatVector(
3584 MI.eraseFromParent();
3593 auto Unmerge =
B.buildUnmerge(Ty, Src);
3594 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3603 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3605 LLT DstLLT =
MRI.getType(DstReg);
3617 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3626 MI.eraseFromParent();
3637 MI.eraseFromParent();
3644 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3645 if (SrcTy.isVector()) {
3649 if (DstTy.isVector()) {
3650 int NumDstElt = DstTy.getNumElements();
3651 int NumSrcElt = SrcTy.getNumElements();
3654 LLT DstCastTy = DstEltTy;
3655 LLT SrcPartTy = SrcEltTy;
3659 if (NumSrcElt < NumDstElt) {
3670 SrcPartTy = SrcEltTy;
3671 }
else if (NumSrcElt > NumDstElt) {
3683 DstCastTy = DstEltTy;
3688 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3692 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3693 MI.eraseFromParent();
3697 if (DstTy.isVector()) {
3700 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3701 MI.eraseFromParent();
3717 unsigned NewEltSize,
3718 unsigned OldEltSize) {
3719 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3720 LLT IdxTy =
B.getMRI()->getType(Idx);
3723 auto OffsetMask =
B.buildConstant(
3725 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3726 return B.buildShl(IdxTy, OffsetIdx,
3727 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3742 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3746 unsigned OldNumElts = SrcVecTy.getNumElements();
3753 if (NewNumElts > OldNumElts) {
3764 if (NewNumElts % OldNumElts != 0)
3768 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3772 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3775 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3777 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3778 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3779 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3780 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3781 NewOps[
I] = Elt.getReg(0);
3784 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3786 MI.eraseFromParent();
3790 if (NewNumElts < OldNumElts) {
3791 if (NewEltSize % OldEltSize != 0)
3813 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3814 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3817 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3821 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3822 ScaledIdx).getReg(0);
3830 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3832 MI.eraseFromParent();
3846 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3847 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3848 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3849 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3852 auto EltMask =
B.buildConstant(
3856 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3857 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3860 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3864 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3878 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3879 MI.getFirst4RegLLTs();
3891 if (NewNumElts < OldNumElts) {
3892 if (NewEltSize % OldEltSize != 0)
3901 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3902 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3905 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3909 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3910 ScaledIdx).getReg(0);
3920 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3921 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3925 MI.eraseFromParent();
3955 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3959 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3960 return UnableToLegalize;
3965 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3967 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3976 MI.eraseFromParent();
3994 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3995 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4005 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4006 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4008 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4009 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4011 MI.eraseFromParent();
4041 LLT DstTy = MRI.getType(Dst);
4042 LLT SrcTy = MRI.getType(Src);
4048 if (DstTy == CastTy)
4056 if (CastEltSize < DstEltSize)
4059 auto AdjustAmt = CastEltSize / DstEltSize;
4060 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4061 SrcTyMinElts % AdjustAmt != 0)
4066 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4067 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4070 ES->eraseFromParent();
4105 LLT DstTy = MRI.getType(Dst);
4106 LLT BigVecTy = MRI.getType(BigVec);
4107 LLT SubVecTy = MRI.getType(SubVec);
4109 if (DstTy == CastTy)
4124 if (CastEltSize < DstEltSize)
4127 auto AdjustAmt = CastEltSize / DstEltSize;
4128 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4129 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4135 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4136 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4138 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4141 ES->eraseFromParent();
4149 LLT DstTy = MRI.getType(DstReg);
4157 if (MemSizeInBits != MemStoreSizeInBits) {
4174 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4178 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4179 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4181 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4184 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4186 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4189 if (DstTy != LoadTy)
4197 if (
MIRBuilder.getDataLayout().isBigEndian())
4215 uint64_t LargeSplitSize, SmallSplitSize;
4220 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4227 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4230 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4241 if (Alignment.
value() * 8 > MemSizeInBits &&
4246 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4263 LLT PtrTy = MRI.getType(PtrReg);
4266 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4270 LargeSplitSize / 8);
4271 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4272 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4274 SmallPtr, *SmallMMO);
4276 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4277 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4279 if (AnyExtTy == DstTy)
4280 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4282 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4286 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4306 LLT SrcTy = MRI.getType(SrcReg);
4314 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4320 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4322 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4326 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4330 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4345 uint64_t LargeSplitSize, SmallSplitSize;
4352 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4355 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4364 if (SrcTy.isPointer()) {
4366 SrcReg =
MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4369 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4372 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4373 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4376 LLT PtrTy = MRI.getType(PtrReg);
4379 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4385 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4386 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4395 LLT SrcTy = MRI.getType(SrcReg);
4401 assert(SrcTy.isVector() &&
"Expect a vector store type");
4408 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4412 auto Elt =
MIRBuilder.buildExtractVectorElement(
4413 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4414 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4415 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4421 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4422 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4426 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4437 switch (
MI.getOpcode()) {
4438 case TargetOpcode::G_LOAD: {
4456 case TargetOpcode::G_STORE: {
4472 case TargetOpcode::G_SELECT: {
4476 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4478 dbgs() <<
"bitcast action not implemented for vector select\n");
4489 case TargetOpcode::G_AND:
4490 case TargetOpcode::G_OR:
4491 case TargetOpcode::G_XOR: {
4499 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4501 case TargetOpcode::G_INSERT_VECTOR_ELT:
4503 case TargetOpcode::G_CONCAT_VECTORS:
4505 case TargetOpcode::G_SHUFFLE_VECTOR:
4507 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4509 case TargetOpcode::G_INSERT_SUBVECTOR:
4517void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4527 switch(
MI.getOpcode()) {
4530 case TargetOpcode::G_FCONSTANT:
4532 case TargetOpcode::G_BITCAST:
4534 case TargetOpcode::G_SREM:
4535 case TargetOpcode::G_UREM: {
4536 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4538 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4539 {MI.getOperand(1), MI.getOperand(2)});
4541 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4543 MI.eraseFromParent();
4546 case TargetOpcode::G_SADDO:
4547 case TargetOpcode::G_SSUBO:
4549 case TargetOpcode::G_SADDE:
4551 case TargetOpcode::G_SSUBE:
4553 case TargetOpcode::G_UMULH:
4554 case TargetOpcode::G_SMULH:
4556 case TargetOpcode::G_SMULO:
4557 case TargetOpcode::G_UMULO: {
4560 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4561 LLT Ty = MRI.getType(Res);
4563 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4564 ? TargetOpcode::G_SMULH
4565 : TargetOpcode::G_UMULH;
4569 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4570 MI.removeOperand(1);
4573 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4581 if (Opcode == TargetOpcode::G_SMULH) {
4582 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4583 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4590 case TargetOpcode::G_FNEG: {
4591 auto [Res, SubByReg] =
MI.getFirst2Regs();
4592 LLT Ty = MRI.getType(Res);
4596 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4597 MI.eraseFromParent();
4600 case TargetOpcode::G_FSUB:
4601 case TargetOpcode::G_STRICT_FSUB: {
4602 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4603 LLT Ty = MRI.getType(Res);
4608 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4609 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4613 MI.eraseFromParent();
4616 case TargetOpcode::G_FMAD:
4618 case TargetOpcode::G_FFLOOR:
4620 case TargetOpcode::G_LROUND:
4621 case TargetOpcode::G_LLROUND: {
4624 LLT SrcTy = MRI.getType(SrcReg);
4625 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4628 MI.eraseFromParent();
4631 case TargetOpcode::G_INTRINSIC_ROUND:
4633 case TargetOpcode::G_FRINT: {
4636 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4639 case TargetOpcode::G_INTRINSIC_LRINT:
4640 case TargetOpcode::G_INTRINSIC_LLRINT: {
4643 LLT SrcTy = MRI.getType(SrcReg);
4645 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4647 MI.eraseFromParent();
4650 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4651 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4652 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4653 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4654 **
MI.memoperands_begin());
4656 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4657 MI.eraseFromParent();
4660 case TargetOpcode::G_LOAD:
4661 case TargetOpcode::G_SEXTLOAD:
4662 case TargetOpcode::G_ZEXTLOAD:
4664 case TargetOpcode::G_STORE:
4666 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4667 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4668 case TargetOpcode::G_CTLZ:
4669 case TargetOpcode::G_CTTZ:
4670 case TargetOpcode::G_CTPOP:
4671 case TargetOpcode::G_CTLS:
4674 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4676 Register NewRes = MRI.cloneVirtualRegister(Res);
4683 MI.eraseFromParent();
4687 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4688 const LLT CondTy = MRI.getType(CarryOut);
4689 const LLT Ty = MRI.getType(Res);
4691 Register NewRes = MRI.cloneVirtualRegister(Res);
4694 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4700 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4701 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4708 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4713 MI.eraseFromParent();
4717 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4722 MI.eraseFromParent();
4726 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4727 const LLT CondTy = MRI.getType(BorrowOut);
4728 const LLT Ty = MRI.getType(Res);
4731 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4737 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4738 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4745 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4746 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4748 MI.eraseFromParent();
4786 case G_MERGE_VALUES:
4788 case G_UNMERGE_VALUES:
4790 case TargetOpcode::G_SEXT_INREG: {
4791 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4792 int64_t SizeInBits =
MI.getOperand(2).getImm();
4794 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4795 LLT DstTy = MRI.getType(DstReg);
4796 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4799 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4800 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4801 MI.eraseFromParent();
4804 case G_EXTRACT_VECTOR_ELT:
4805 case G_INSERT_VECTOR_ELT:
4807 case G_SHUFFLE_VECTOR:
4809 case G_VECTOR_COMPRESS:
4811 case G_DYN_STACKALLOC:
4815 case G_STACKRESTORE:
4825 case G_READ_REGISTER:
4826 case G_WRITE_REGISTER:
4833 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4834 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4840 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4845 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4856 bool IsSigned =
MI.getOpcode() == G_ABDS;
4857 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4858 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4859 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4883 case G_MEMCPY_INLINE:
4884 return lowerMemcpyInline(
MI);
4895 case G_ATOMICRMW_SUB: {
4896 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
4897 const LLT ValTy = MRI.getType(Val);
4901 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4902 MI.eraseFromParent();
4925 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4929 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4935 Align StackTypeAlign =
4942 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4943 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4948 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4960 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4963 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4974 "Converting bits to bytes lost precision");
4980 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4981 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4983 if (IdxTy != MRI.getType(Index))
4984 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4989 LLT PtrTy = MRI.getType(VecPtr);
4990 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
4998 std::initializer_list<unsigned> NonVecOpIndices) {
4999 if (
MI.getNumMemOperands() != 0)
5002 LLT VecTy =
MRI.getType(
MI.getReg(0));
5016 if (!Ty.isVector()) {
5022 if (Ty.getNumElements() != NumElts)
5037 assert(Ty.isVector() &&
"Expected vector type");
5039 int NumParts, NumLeftover;
5040 std::tie(NumParts, NumLeftover) =
5043 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5044 for (
int i = 0; i < NumParts; ++i) {
5049 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5058 for (
unsigned i = 0; i <
N; ++i) {
5060 Ops.push_back(
Op.getReg());
5061 else if (
Op.isImm())
5062 Ops.push_back(
Op.getImm());
5063 else if (
Op.isPredicate())
5085 std::initializer_list<unsigned> NonVecOpIndices) {
5087 "Non-compatible opcode or not specified non-vector operands");
5088 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5090 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5091 unsigned NumDefs =
MI.getNumDefs();
5099 for (
unsigned i = 0; i < NumDefs; ++i) {
5100 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5108 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5109 ++UseIdx, ++UseNo) {
5112 MI.getOperand(UseIdx));
5121 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5125 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5127 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5128 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5131 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5132 Uses.push_back(InputOpsPieces[InputNo][i]);
5135 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5136 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5141 for (
unsigned i = 0; i < NumDefs; ++i)
5142 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5144 for (
unsigned i = 0; i < NumDefs; ++i)
5145 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5148 MI.eraseFromParent();
5155 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5157 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5158 unsigned NumDefs =
MI.getNumDefs();
5162 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5167 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5168 UseIdx += 2, ++UseNo) {
5176 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5178 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5179 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5181 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5184 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5185 Phi.addUse(InputOpsPieces[j][i]);
5186 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5196 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5198 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5201 MI.eraseFromParent();
5209 const int NumDst =
MI.getNumOperands() - 1;
5210 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5211 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5212 LLT SrcTy = MRI.getType(SrcReg);
5214 if (TypeIdx != 1 || NarrowTy == DstTy)
5221 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5224 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5238 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5239 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5240 const int PartsPerUnmerge = NumDst / NumUnmerge;
5242 for (
int I = 0;
I != NumUnmerge; ++
I) {
5243 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5245 for (
int J = 0; J != PartsPerUnmerge; ++J)
5246 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5247 MIB.addUse(Unmerge.getReg(
I));
5250 MI.eraseFromParent();
5257 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5261 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5263 if (NarrowTy == SrcTy)
5271 assert(SrcTy.isVector() &&
"Expected vector types");
5273 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5287 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5288 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5289 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5295 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5296 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5297 ++i,
Offset += NumNarrowTyElts) {
5300 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5303 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5304 MI.eraseFromParent();
5308 assert(TypeIdx == 0 &&
"Bad type index");
5309 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5324 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5325 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5327 for (
unsigned i = 0; i < NumParts; ++i) {
5329 for (
unsigned j = 0; j < NumElts; ++j)
5330 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5332 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5335 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5336 MI.eraseFromParent();
5344 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5346 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5348 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5350 InsertVal =
MI.getOperand(2).getReg();
5352 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5353 LLT VecTy = MRI.getType(SrcVec);
5359 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5363 MI.eraseFromParent();
5372 SplitPieces[IdxVal] = InsertVal;
5373 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5375 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5379 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5382 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5383 TargetOpcode::G_ANYEXT);
5387 LLT IdxTy = MRI.getType(Idx);
5388 int64_t PartIdx = IdxVal / NewNumElts;
5390 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5393 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5396 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5397 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5398 VecParts[PartIdx] = InsertPart.getReg(0);
5402 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5404 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5408 MI.eraseFromParent();
5428 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5440 LLT ValTy = MRI.getType(ValReg);
5449 int NumLeftover = -1;
5455 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5457 NumParts = NarrowRegs.
size();
5458 NumLeftover = NarrowLeftoverRegs.
size();
5465 LLT PtrTy = MRI.getType(AddrReg);
5468 unsigned TotalSize = ValTy.getSizeInBits();
5475 auto MMO = LdStMI.
getMMO();
5477 unsigned NumParts,
unsigned Offset) ->
unsigned {
5480 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5482 unsigned ByteOffset =
Offset / 8;
5485 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5492 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5493 ValRegs.push_back(Dst);
5494 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5496 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5505 unsigned HandledOffset =
5506 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5510 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5513 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5514 LeftoverTy, NarrowLeftoverRegs);
5528 switch (
MI.getOpcode()) {
5529 case G_IMPLICIT_DEF:
5545 case G_FCANONICALIZE:
5562 case G_INTRINSIC_LRINT:
5563 case G_INTRINSIC_LLRINT:
5564 case G_INTRINSIC_ROUND:
5565 case G_INTRINSIC_ROUNDEVEN:
5568 case G_INTRINSIC_TRUNC:
5596 case G_FMINNUM_IEEE:
5597 case G_FMAXNUM_IEEE:
5619 case G_CTLZ_ZERO_UNDEF:
5621 case G_CTTZ_ZERO_UNDEF:
5637 case G_ADDRSPACE_CAST:
5650 case G_STRICT_FLDEXP:
5652 case G_TRUNC_SSAT_S:
5653 case G_TRUNC_SSAT_U:
5654 case G_TRUNC_USAT_U:
5662 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5667 case G_UNMERGE_VALUES:
5669 case G_BUILD_VECTOR:
5670 assert(TypeIdx == 0 &&
"not a vector type index");
5672 case G_CONCAT_VECTORS:
5676 case G_EXTRACT_VECTOR_ELT:
5677 case G_INSERT_VECTOR_ELT:
5686 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5687 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5689 case G_SHUFFLE_VECTOR:
5695 case G_INTRINSIC_FPTRUNC_ROUND:
5705 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5706 "Not a bitcast operation");
5711 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5713 unsigned NewElemCount =
5716 if (NewElemCount == 1) {
5719 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5726 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5735 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5736 MI.eraseFromParent();
5742 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5746 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5747 MI.getFirst3RegLLTs();
5750 if (DstTy != Src1Ty)
5752 if (DstTy != Src2Ty)
5767 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5783 unsigned InputUsed[2] = {-1U, -1U};
5784 unsigned FirstMaskIdx =
High * NewElts;
5785 bool UseBuildVector =
false;
5786 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5788 int Idx = Mask[FirstMaskIdx + MaskOffset];
5793 if (
Input >= std::size(Inputs)) {
5800 Idx -=
Input * NewElts;
5804 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5805 if (InputUsed[OpNo] ==
Input) {
5808 }
else if (InputUsed[OpNo] == -1U) {
5810 InputUsed[OpNo] =
Input;
5815 if (OpNo >= std::size(InputUsed)) {
5818 UseBuildVector =
true;
5823 Ops.push_back(Idx + OpNo * NewElts);
5826 if (UseBuildVector) {
5831 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5833 int Idx = Mask[FirstMaskIdx + MaskOffset];
5838 if (
Input >= std::size(Inputs)) {
5845 Idx -=
Input * NewElts;
5849 .buildExtractVectorElement(
5850 EltTy, Inputs[
Input],
5856 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5857 }
else if (InputUsed[0] == -1U) {
5859 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5860 }
else if (NewElts == 1) {
5861 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5863 Register Op0 = Inputs[InputUsed[0]];
5867 : Inputs[InputUsed[1]];
5869 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5876 MI.eraseFromParent();
5889 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5895 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5898 const unsigned NumParts =
5900 : SrcTy.getNumElements();
5904 if (DstTy != NarrowTy)
5910 unsigned NumPartsLeft = NumParts;
5911 while (NumPartsLeft > 1) {
5912 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5915 .buildInstr(ScalarOpc, {NarrowTy},
5916 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5919 SplitSrcs = PartialResults;
5920 PartialResults.
clear();
5921 NumPartsLeft = SplitSrcs.
size();
5925 MI.eraseFromParent();
5930 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
5931 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5934 MI.eraseFromParent();
5938 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5940 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5948 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5951 Register Acc = PartialReductions[0];
5952 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5953 if (Part == NumParts - 1) {
5955 {Acc, PartialReductions[Part]});
5958 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5962 MI.eraseFromParent();
5968 unsigned int TypeIdx,
5970 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5971 MI.getFirst3RegLLTs();
5972 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5976 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5977 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5978 "Unexpected vecreduce opcode");
5979 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5980 ? TargetOpcode::G_FADD
5981 : TargetOpcode::G_FMUL;
5984 unsigned NumParts = SrcTy.getNumElements();
5987 for (
unsigned i = 0; i < NumParts; i++)
5988 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5992 MI.eraseFromParent();
5999 unsigned ScalarOpc) {
6007 while (SplitSrcs.
size() > 1) {
6009 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6017 SplitSrcs = std::move(PartialRdxs);
6021 MI.getOperand(1).setReg(SplitSrcs[0]);
6028 const LLT HalfTy,
const LLT AmtTy) {
6030 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6031 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6035 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6036 MI.eraseFromParent();
6042 unsigned VTBits = 2 * NVTBits;
6045 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6046 if (Amt.
ugt(VTBits)) {
6048 }
else if (Amt.
ugt(NVTBits)) {
6051 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6052 }
else if (Amt == NVTBits) {
6060 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6063 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6064 if (Amt.
ugt(VTBits)) {
6066 }
else if (Amt.
ugt(NVTBits)) {
6068 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6070 }
else if (Amt == NVTBits) {
6074 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6076 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6078 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6084 if (Amt.
ugt(VTBits)) {
6086 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6087 }
else if (Amt.
ugt(NVTBits)) {
6089 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6091 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6092 }
else if (Amt == NVTBits) {
6095 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6097 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6099 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6101 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6108 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6109 MI.eraseFromParent();
6125 LLT DstTy = MRI.getType(DstReg);
6130 LLT ShiftAmtTy = MRI.getType(Amt);
6132 if (DstEltSize % 2 != 0)
6148 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6159 const unsigned NewBitSize = DstEltSize / 2;
6171 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6173 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6174 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6177 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6178 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6180 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6185 switch (
MI.getOpcode()) {
6186 case TargetOpcode::G_SHL: {
6188 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6190 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6191 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6192 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6195 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6196 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6198 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6200 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6202 ResultRegs[0] =
Lo.getReg(0);
6203 ResultRegs[1] =
Hi.getReg(0);
6206 case TargetOpcode::G_LSHR:
6207 case TargetOpcode::G_ASHR: {
6209 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6211 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6212 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6213 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6217 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6220 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6221 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6223 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6227 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6229 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6231 ResultRegs[0] =
Lo.getReg(0);
6232 ResultRegs[1] =
Hi.getReg(0);
6239 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6240 MI.eraseFromParent();
6249 LLT TargetTy,
LLT ShiftAmtTy) {
6252 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6254 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6255 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6256 const bool NeedsInterWordShift = ShiftBits != 0;
6259 case TargetOpcode::G_SHL: {
6262 if (PartIdx < ShiftWords)
6265 unsigned SrcIdx = PartIdx - ShiftWords;
6266 if (!NeedsInterWordShift)
6267 return SrcParts[SrcIdx];
6272 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6276 return Hi.getReg(0);
6279 case TargetOpcode::G_LSHR: {
6280 unsigned SrcIdx = PartIdx + ShiftWords;
6281 if (SrcIdx >= NumParts)
6283 if (!NeedsInterWordShift)
6284 return SrcParts[SrcIdx];
6288 if (SrcIdx + 1 < NumParts) {
6289 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6293 return Lo.getReg(0);
6296 case TargetOpcode::G_ASHR: {
6298 unsigned SrcIdx = PartIdx + ShiftWords;
6299 if (SrcIdx >= NumParts)
6301 if (!NeedsInterWordShift)
6302 return SrcParts[SrcIdx];
6307 (SrcIdx == NumParts - 1)
6311 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6333 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6334 ?
static_cast<unsigned>(TargetOpcode::G_LSHR)
6339 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6348 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6349 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6351 auto IsZeroBitShift =
6359 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6360 : TargetOpcode::G_SHL;
6363 auto TargetBitsConst =
6365 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6370 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6375 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6377 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6381 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6394 LLT DstTy = MRI.getType(DstReg);
6398 const unsigned NumParts = DstBits / TargetBits;
6400 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6410 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6411 MI.eraseFromParent();
6416 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6417 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6423 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6427 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6430 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6431 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6435 for (
unsigned I = 0;
I < NumParts; ++
I)
6437 Params, TargetTy, ShiftAmtTy);
6439 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6440 MI.eraseFromParent();
6449 LLT DstTy = MRI.getType(DstReg);
6450 LLT ShiftAmtTy = MRI.getType(AmtReg);
6454 const unsigned NumParts = DstBits / TargetBits;
6456 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6473 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6485 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6486 auto TargetBitsLog2Const =
6487 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6488 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6491 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6493 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6501 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6502 auto TargetBitsMinusOneConst =
6503 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6505 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6506 TargetBitsMinusOneConst)
6509 FillValue = ZeroReg;
6517 for (
unsigned I = 0;
I < NumParts; ++
I) {
6519 Register InBoundsResult = FillValue;
6529 for (
unsigned K = 0; K < NumParts; ++K) {
6530 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6532 WordShift, WordShiftKConst);
6544 switch (
MI.getOpcode()) {
6545 case TargetOpcode::G_SHL:
6546 MainSrcIdx = (int)
I - (
int)K;
6547 CarrySrcIdx = MainSrcIdx - 1;
6549 case TargetOpcode::G_LSHR:
6550 case TargetOpcode::G_ASHR:
6551 MainSrcIdx = (int)
I + (
int)K;
6552 CarrySrcIdx = MainSrcIdx + 1;
6560 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6561 Register MainOp = SrcParts[MainSrcIdx];
6565 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6566 CarryOp = SrcParts[CarrySrcIdx];
6567 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6568 CarrySrcIdx >= (
int)NumParts)
6569 CarryOp = FillValue;
6575 ResultForK = FillValue;
6581 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6588 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6592 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6593 MI.eraseFromParent();
6600 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6603 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6618 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6623 "getNeutralElementForVecReduce called with invalid opcode!");
6624 case TargetOpcode::G_VECREDUCE_ADD:
6625 case TargetOpcode::G_VECREDUCE_OR:
6626 case TargetOpcode::G_VECREDUCE_XOR:
6627 case TargetOpcode::G_VECREDUCE_UMAX:
6629 case TargetOpcode::G_VECREDUCE_MUL:
6631 case TargetOpcode::G_VECREDUCE_AND:
6632 case TargetOpcode::G_VECREDUCE_UMIN:
6635 case TargetOpcode::G_VECREDUCE_SMAX:
6638 case TargetOpcode::G_VECREDUCE_SMIN:
6641 case TargetOpcode::G_VECREDUCE_FADD:
6643 case TargetOpcode::G_VECREDUCE_FMUL:
6645 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6646 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6647 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6648 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6656 unsigned Opc =
MI.getOpcode();
6658 case TargetOpcode::G_IMPLICIT_DEF:
6659 case TargetOpcode::G_LOAD: {
6667 case TargetOpcode::G_STORE:
6674 case TargetOpcode::G_AND:
6675 case TargetOpcode::G_OR:
6676 case TargetOpcode::G_XOR:
6677 case TargetOpcode::G_ADD:
6678 case TargetOpcode::G_SUB:
6679 case TargetOpcode::G_MUL:
6680 case TargetOpcode::G_FADD:
6681 case TargetOpcode::G_FSUB:
6682 case TargetOpcode::G_FMUL:
6683 case TargetOpcode::G_FDIV:
6684 case TargetOpcode::G_FCOPYSIGN:
6685 case TargetOpcode::G_UADDSAT:
6686 case TargetOpcode::G_USUBSAT:
6687 case TargetOpcode::G_SADDSAT:
6688 case TargetOpcode::G_SSUBSAT:
6689 case TargetOpcode::G_SMIN:
6690 case TargetOpcode::G_SMAX:
6691 case TargetOpcode::G_UMIN:
6692 case TargetOpcode::G_UMAX:
6693 case TargetOpcode::G_FMINNUM:
6694 case TargetOpcode::G_FMAXNUM:
6695 case TargetOpcode::G_FMINNUM_IEEE:
6696 case TargetOpcode::G_FMAXNUM_IEEE:
6697 case TargetOpcode::G_FMINIMUM:
6698 case TargetOpcode::G_FMAXIMUM:
6699 case TargetOpcode::G_FMINIMUMNUM:
6700 case TargetOpcode::G_FMAXIMUMNUM:
6701 case TargetOpcode::G_STRICT_FADD:
6702 case TargetOpcode::G_STRICT_FSUB:
6703 case TargetOpcode::G_STRICT_FMUL: {
6711 case TargetOpcode::G_SHL:
6712 case TargetOpcode::G_ASHR:
6713 case TargetOpcode::G_LSHR: {
6719 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6725 case TargetOpcode::G_FMA:
6726 case TargetOpcode::G_STRICT_FMA:
6727 case TargetOpcode::G_FSHR:
6728 case TargetOpcode::G_FSHL: {
6737 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6738 case TargetOpcode::G_EXTRACT:
6745 case TargetOpcode::G_INSERT:
6746 case TargetOpcode::G_INSERT_VECTOR_ELT:
6747 case TargetOpcode::G_FREEZE:
6748 case TargetOpcode::G_FNEG:
6749 case TargetOpcode::G_FABS:
6750 case TargetOpcode::G_FSQRT:
6751 case TargetOpcode::G_FCEIL:
6752 case TargetOpcode::G_FFLOOR:
6753 case TargetOpcode::G_FNEARBYINT:
6754 case TargetOpcode::G_FRINT:
6755 case TargetOpcode::G_INTRINSIC_ROUND:
6756 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6757 case TargetOpcode::G_INTRINSIC_TRUNC:
6758 case TargetOpcode::G_BITREVERSE:
6759 case TargetOpcode::G_BSWAP:
6760 case TargetOpcode::G_FCANONICALIZE:
6761 case TargetOpcode::G_SEXT_INREG:
6762 case TargetOpcode::G_ABS:
6763 case TargetOpcode::G_CTLZ:
6764 case TargetOpcode::G_CTPOP:
6772 case TargetOpcode::G_SELECT: {
6773 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6775 if (!CondTy.isScalar() ||
6781 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6783 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6788 if (CondTy.isVector())
6798 case TargetOpcode::G_UNMERGE_VALUES:
6800 case TargetOpcode::G_PHI:
6802 case TargetOpcode::G_SHUFFLE_VECTOR:
6804 case TargetOpcode::G_BUILD_VECTOR: {
6806 for (
auto Op :
MI.uses()) {
6814 MIRBuilder.buildDeleteTrailingVectorElements(
6815 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6816 MI.eraseFromParent();
6819 case TargetOpcode::G_SEXT:
6820 case TargetOpcode::G_ZEXT:
6821 case TargetOpcode::G_ANYEXT:
6822 case TargetOpcode::G_TRUNC:
6823 case TargetOpcode::G_FPTRUNC:
6824 case TargetOpcode::G_FPEXT:
6825 case TargetOpcode::G_FPTOSI:
6826 case TargetOpcode::G_FPTOUI:
6827 case TargetOpcode::G_FPTOSI_SAT:
6828 case TargetOpcode::G_FPTOUI_SAT:
6829 case TargetOpcode::G_SITOFP:
6830 case TargetOpcode::G_UITOFP: {
6837 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6840 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6848 case TargetOpcode::G_ICMP:
6849 case TargetOpcode::G_FCMP: {
6857 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6862 case TargetOpcode::G_BITCAST: {
6866 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6867 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
6883 case TargetOpcode::G_VECREDUCE_FADD:
6884 case TargetOpcode::G_VECREDUCE_FMUL:
6885 case TargetOpcode::G_VECREDUCE_ADD:
6886 case TargetOpcode::G_VECREDUCE_MUL:
6887 case TargetOpcode::G_VECREDUCE_AND:
6888 case TargetOpcode::G_VECREDUCE_OR:
6889 case TargetOpcode::G_VECREDUCE_XOR:
6890 case TargetOpcode::G_VECREDUCE_SMAX:
6891 case TargetOpcode::G_VECREDUCE_SMIN:
6892 case TargetOpcode::G_VECREDUCE_UMAX:
6893 case TargetOpcode::G_VECREDUCE_UMIN: {
6894 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
6896 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6897 auto NeutralElement = getNeutralElementForVecReduce(
6903 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
6904 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6905 NeutralElement, Idx);
6909 MO.
setReg(NewVec.getReg(0));
6921 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6923 unsigned MaskNumElts = Mask.size();
6924 unsigned SrcNumElts = SrcTy.getNumElements();
6927 if (MaskNumElts == SrcNumElts)
6930 if (MaskNumElts < SrcNumElts) {
6938 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
6939 MI.getOperand(1).getReg(),
6940 MI.getOperand(2).getReg(), NewMask);
6941 MI.eraseFromParent();
6946 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
6947 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6956 MOps1[0] =
MI.getOperand(1).getReg();
6957 MOps2[0] =
MI.getOperand(2).getReg();
6959 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6960 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6964 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
6966 if (Idx >=
static_cast<int>(SrcNumElts))
6967 Idx += PaddedMaskNumElts - SrcNumElts;
6972 if (MaskNumElts != PaddedMaskNumElts) {
6974 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6977 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
6979 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
6984 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6987 MI.eraseFromParent();
6993 unsigned int TypeIdx,
LLT MoreTy) {
6994 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
6996 unsigned NumElts = DstTy.getNumElements();
6999 if (DstTy.isVector() && Src1Ty.isVector() &&
7000 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7008 if (DstTy != Src1Ty || DstTy != Src2Ty)
7016 for (
unsigned I = 0;
I != NumElts; ++
I) {
7018 if (Idx <
static_cast<int>(NumElts))
7021 NewMask[
I] = Idx - NumElts + WidenNumElts;
7025 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7026 MI.getOperand(1).getReg(),
7027 MI.getOperand(2).getReg(), NewMask);
7028 MI.eraseFromParent();
7037 unsigned SrcParts = Src1Regs.
size();
7038 unsigned DstParts = DstRegs.
size();
7040 unsigned DstIdx = 0;
7042 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7043 DstRegs[DstIdx] = FactorSum;
7048 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7050 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7051 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7053 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7057 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7058 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7060 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7070 if (DstIdx != DstParts - 1) {
7071 MachineInstrBuilder Uaddo =
7072 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
7073 FactorSum = Uaddo.
getReg(0);
7074 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7075 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7076 MachineInstrBuilder Uaddo =
7077 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
7078 FactorSum = Uaddo.
getReg(0);
7079 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7080 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7084 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7085 for (
unsigned i = 2; i < Factors.
size(); ++i)
7086 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7089 CarrySumPrevDstIdx = CarrySum;
7090 DstRegs[DstIdx] = FactorSum;
7102 LLT DstType = MRI.getType(DstReg);
7104 if (DstType.isVector())
7107 unsigned Opcode =
MI.getOpcode();
7108 unsigned OpO, OpE, OpF;
7110 case TargetOpcode::G_SADDO:
7111 case TargetOpcode::G_SADDE:
7112 case TargetOpcode::G_UADDO:
7113 case TargetOpcode::G_UADDE:
7114 case TargetOpcode::G_ADD:
7115 OpO = TargetOpcode::G_UADDO;
7116 OpE = TargetOpcode::G_UADDE;
7117 OpF = TargetOpcode::G_UADDE;
7118 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7119 OpF = TargetOpcode::G_SADDE;
7121 case TargetOpcode::G_SSUBO:
7122 case TargetOpcode::G_SSUBE:
7123 case TargetOpcode::G_USUBO:
7124 case TargetOpcode::G_USUBE:
7125 case TargetOpcode::G_SUB:
7126 OpO = TargetOpcode::G_USUBO;
7127 OpE = TargetOpcode::G_USUBE;
7128 OpF = TargetOpcode::G_USUBE;
7129 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7130 OpF = TargetOpcode::G_SSUBE;
7137 unsigned NumDefs =
MI.getNumExplicitDefs();
7138 Register Src1 =
MI.getOperand(NumDefs).getReg();
7139 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7142 CarryDst =
MI.getOperand(1).getReg();
7143 if (
MI.getNumOperands() == NumDefs + 3)
7144 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7146 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7147 LLT LeftoverTy, DummyTy;
7149 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7154 int NarrowParts = Src1Regs.
size();
7155 Src1Regs.
append(Src1Left);
7156 Src2Regs.
append(Src2Left);
7159 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7161 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7164 if (i == e - 1 && CarryDst)
7165 CarryOut = CarryDst;
7167 CarryOut = MRI.createGenericVirtualRegister(
LLT::scalar(1));
7170 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7171 {Src1Regs[i], Src2Regs[i]});
7172 }
else if (i == e - 1) {
7173 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7174 {Src1Regs[i], Src2Regs[i], CarryIn});
7176 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7177 {Src1Regs[i], Src2Regs[i], CarryIn});
7183 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7184 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7185 ArrayRef(DstRegs).drop_front(NarrowParts));
7187 MI.eraseFromParent();
7193 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7195 LLT Ty = MRI.getType(DstReg);
7199 unsigned Size = Ty.getSizeInBits();
7201 if (
Size % NarrowSize != 0)
7204 unsigned NumParts =
Size / NarrowSize;
7205 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7206 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7212 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7216 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7217 MI.eraseFromParent();
7227 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7230 LLT SrcTy = MRI.getType(Src);
7241 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7254 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7257 if (SizeOp1 % NarrowSize != 0)
7259 int NumParts = SizeOp1 / NarrowSize;
7262 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7266 uint64_t OpStart =
MI.getOperand(2).getImm();
7267 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7268 for (
int i = 0; i < NumParts; ++i) {
7269 unsigned SrcStart = i * NarrowSize;
7271 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7274 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7282 int64_t ExtractOffset;
7284 if (OpStart < SrcStart) {
7286 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7288 ExtractOffset = OpStart - SrcStart;
7289 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7293 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7295 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7296 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7303 if (MRI.getType(DstReg).isVector())
7304 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7305 else if (DstRegs.
size() > 1)
7306 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7309 MI.eraseFromParent();
7321 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7323 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7326 SrcRegs.
append(LeftoverRegs);
7330 uint64_t OpStart =
MI.getOperand(3).getImm();
7331 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7332 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7333 unsigned DstStart =
I * NarrowSize;
7335 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7343 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7345 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7349 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7357 int64_t ExtractOffset, InsertOffset;
7359 if (OpStart < DstStart) {
7361 ExtractOffset = DstStart - OpStart;
7362 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7364 InsertOffset = OpStart - DstStart;
7367 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7371 if (ExtractOffset != 0 || SegSize != OpSize) {
7373 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7374 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7377 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7378 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7386 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7389 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7391 MI.eraseFromParent();
7399 LLT DstTy = MRI.getType(DstReg);
7401 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7407 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7408 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7412 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7413 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7416 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7417 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7418 {Src0Regs[I], Src1Regs[I]});
7422 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7425 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7426 DstLeftoverRegs.
push_back(Inst.getReg(0));
7429 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7430 LeftoverTy, DstLeftoverRegs);
7432 MI.eraseFromParent();
7442 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7444 LLT DstTy = MRI.getType(DstReg);
7449 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7450 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7451 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7453 MI.eraseFromParent();
7463 Register CondReg =
MI.getOperand(1).getReg();
7464 LLT CondTy = MRI.getType(CondReg);
7469 LLT DstTy = MRI.getType(DstReg);
7475 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7476 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7480 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7481 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7484 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7486 CondReg, Src1Regs[
I], Src2Regs[
I]);
7490 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7492 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7496 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7497 LeftoverTy, DstLeftoverRegs);
7499 MI.eraseFromParent();
7509 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7512 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7513 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7516 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7518 auto C_0 =
B.buildConstant(NarrowTy, 0);
7520 UnmergeSrc.getReg(1), C_0);
7521 auto LoCTLZ = IsUndef ?
7522 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7523 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7524 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7525 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7526 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7527 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7529 MI.eraseFromParent();
7542 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7545 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7546 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7549 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7551 auto C_0 =
B.buildConstant(NarrowTy, 0);
7553 UnmergeSrc.getReg(0), C_0);
7554 auto HiCTTZ = IsUndef ?
7555 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7556 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7557 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7558 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7559 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7560 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7562 MI.eraseFromParent();
7575 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7578 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7583 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7587 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7588 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7596 auto LoInv =
B.buildXor(DstTy,
Lo, Sign);
7597 auto LoCTLZ =
B.buildCTLZ(DstTy, LoInv);
7600 auto C_NarrowSizeM1 =
B.buildConstant(DstTy, NarrowSize - 1);
7601 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7603 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7605 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7607 MI.eraseFromParent();
7617 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7620 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7621 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7623 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7624 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7625 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7627 MI.eraseFromParent();
7642 LLT ExpTy = MRI.getType(ExpReg);
7647 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7648 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7649 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7650 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7652 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7654 MI.getOperand(2).setReg(Trunc.getReg(0));
7661 unsigned Opc =
MI.getOpcode();
7664 auto QAction = LI.getAction(Q).Action;
7670 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7673 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7677 case TargetOpcode::G_CTLZ: {
7678 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7679 unsigned Len = SrcTy.getScalarSizeInBits();
7681 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7683 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7684 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7687 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7688 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7689 MI.eraseFromParent();
7705 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7706 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7709 Op = MIBOp.getReg(0);
7714 MI.eraseFromParent();
7717 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7720 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7724 case TargetOpcode::G_CTTZ: {
7725 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7727 unsigned Len = SrcTy.getScalarSizeInBits();
7728 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7731 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7732 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7735 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7736 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7737 MI.eraseFromParent();
7744 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7745 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7747 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7748 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7749 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7750 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7753 MI.eraseFromParent();
7757 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7758 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7762 case TargetOpcode::G_CTPOP: {
7764 LLT Ty = MRI.getType(SrcReg);
7765 unsigned Size = Ty.getScalarSizeInBits();
7777 auto C_1 =
B.buildConstant(Ty, 1);
7778 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7780 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7781 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7782 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7786 auto C_2 =
B.buildConstant(Ty, 2);
7787 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7789 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7790 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7791 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7792 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7799 auto C_4 =
B.buildConstant(Ty, 4);
7800 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7801 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7803 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7804 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7806 assert(
Size <= 128 &&
"Scalar size is too large for CTPOP lower algorithm");
7809 if (
Size == 16 && !Ty.isVector()) {
7811 auto C_8 =
B.buildConstant(Ty, 8);
7812 auto HighSum =
B.buildLShr(Ty, B8Count, C_8);
7813 auto Res =
B.buildAdd(Ty, B8Count, HighSum);
7814 B.buildAnd(
MI.getOperand(0).getReg(), Res,
B.buildConstant(Ty, 0xFF));
7815 MI.eraseFromParent();
7824 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7826 auto IsMulSupported = [
this](
const LLT Ty) {
7827 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7830 if (IsMulSupported(Ty)) {
7831 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7832 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7834 auto ResTmp = B8Count;
7835 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7836 auto ShiftC =
B.buildConstant(Ty, Shift);
7837 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7838 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7840 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7842 MI.eraseFromParent();
7845 case TargetOpcode::G_CTLS: {
7846 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7850 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7851 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7853 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7859 MI.eraseFromParent();
7880 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7881 LLT Ty = MRI.getType(Dst);
7882 LLT ShTy = MRI.getType(Z);
7889 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7890 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7895 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
7896 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7900 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7913 MI.eraseFromParent();
7919 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7920 LLT Ty = MRI.getType(Dst);
7921 LLT ShTy = MRI.getType(Z);
7924 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7934 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
7935 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7936 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7937 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7938 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7942 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
7945 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7948 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7950 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
7951 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7952 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7955 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7957 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
7959 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7962 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7963 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
7968 MI.eraseFromParent();
7979 LLT Ty = MRI.getType(Dst);
7980 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
7982 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7983 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7986 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
7987 return lowerFunnelShiftAsShifts(
MI);
7991 if (Result == UnableToLegalize)
7992 return lowerFunnelShiftAsShifts(
MI);
7997 auto [Dst, Src] =
MI.getFirst2Regs();
7998 LLT DstTy = MRI.getType(Dst);
7999 LLT SrcTy = MRI.getType(Src);
8003 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8011 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8015 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8019 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8024 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8025 {UnmergeSrc.getReg(0)});
8026 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8027 {UnmergeSrc.getReg(1)});
8030 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8032 MI.eraseFromParent();
8049 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8053 LLT DstTy = MRI.getType(DstReg);
8054 LLT SrcTy = MRI.getType(SrcReg);
8062 SrcTy.getElementCount().divideCoefficientBy(2));
8075 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8087 MI.eraseFromParent();
8096 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8097 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8098 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8099 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8100 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8101 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8102 MI.eraseFromParent();
8107 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8109 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8110 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8115 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8116 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8118 return lowerRotateWithReverseRotate(
MI);
8121 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8122 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8123 bool IsFShLegal =
false;
8124 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8125 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8129 MI.eraseFromParent();
8134 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8137 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8142 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8143 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8144 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8150 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8151 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8153 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8159 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8160 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8162 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8164 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8169 MI.eraseFromParent();
8177 auto [Dst, Src] =
MI.getFirst2Regs();
8182 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8210 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8223 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8227 MI.eraseFromParent();
8235 auto [Dst, Src] =
MI.getFirst2Regs();
8240 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8253 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8255 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8260 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8262 MI.eraseFromParent();
8270 auto [Dst, Src] =
MI.getFirst2Regs();
8274 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8285 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8286 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8288 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8295 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8296 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8297 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8299 MI.eraseFromParent();
8309 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8315 MI.eraseFromParent();
8320 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8323 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8324 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8325 MIRBuilder.buildSelect(Dst, Src, True, False);
8326 MI.eraseFromParent();
8330 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8350 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8357 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8358 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8359 MIRBuilder.buildSelect(Dst, Src, True, False);
8360 MI.eraseFromParent();
8364 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8387 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8388 MI.eraseFromParent();
8396 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8400 if (SrcTy !=
S64 && SrcTy !=
S32)
8402 if (DstTy !=
S32 && DstTy !=
S64)
8429 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8431 MI.eraseFromParent();
8436 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8441 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8448 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8450 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8451 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8453 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8454 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8456 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8458 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8459 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8460 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8463 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8464 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8465 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8467 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8470 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8475 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8476 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8482 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8484 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8485 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8487 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8492 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8493 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8495 MI.eraseFromParent();
8501 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8503 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8504 unsigned SatWidth = DstTy.getScalarSizeInBits();
8508 APInt MinInt, MaxInt;
8531 if (AreExactFloatBounds) {
8533 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8535 SrcTy.changeElementSize(1), Src, MaxC);
8536 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8538 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8548 MI.eraseFromParent();
8553 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8555 DstTy.changeElementSize(1), Src, Src);
8558 MI.eraseFromParent();
8565 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8574 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8585 MI.eraseFromParent();
8591 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8593 DstTy.changeElementSize(1), Src, Src);
8595 MI.eraseFromParent();
8605 auto [Dst, Src] =
MI.getFirst2Regs();
8607 MRI.getType(Src).getScalarType() ==
LLT::scalar(64));
8609 if (MRI.getType(Src).isVector())
8613 unsigned Flags =
MI.getFlags();
8616 MI.eraseFromParent();
8620 const unsigned ExpMask = 0x7ff;
8621 const unsigned ExpBiasf64 = 1023;
8622 const unsigned ExpBiasf16 = 15;
8651 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8711 MI.eraseFromParent();
8717 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8721 if (DstTy.getScalarType() ==
S16 && SrcTy.getScalarType() ==
S64)
8728 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8729 LLT Ty = MRI.getType(Dst);
8731 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8732 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8733 MI.eraseFromParent();
8738 auto [DstFrac, DstInt, Src] =
MI.getFirst3Regs();
8739 LLT Ty = MRI.getType(Src);
8740 auto Flags =
MI.getFlags();
8747 FracToUse = FracPart.getReg(0);
8749 auto Abs =
MIRBuilder.buildFAbs(Ty, Src, Flags);
8753 Ty.changeElementSize(1), Abs, Inf);
8754 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8756 FracToUse =
Select.getReg(0);
8759 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8762 MI.eraseFromParent();
8768 case TargetOpcode::G_SMIN:
8770 case TargetOpcode::G_SMAX:
8772 case TargetOpcode::G_UMIN:
8774 case TargetOpcode::G_UMAX:
8782 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8787 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8788 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8790 MI.eraseFromParent();
8799 LLT DstTy = MRI.getType(Dst);
8800 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8810 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
8811 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8813 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8816 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
8817 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
8818 if (TLI.preferSelectsOverBooleanArithmetic(
8821 auto One =
MIRBuilder.buildConstant(DstTy, 1);
8822 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8824 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
8825 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8831 unsigned BoolExtOp =
8833 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8834 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8838 MI.eraseFromParent();
8844 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8845 const int Src0Size = Src0Ty.getScalarSizeInBits();
8846 const int Src1Size = Src1Ty.getScalarSizeInBits();
8851 auto NotSignBitMask =
MIRBuilder.buildConstant(
8856 if (Src0Ty == Src1Ty) {
8857 And1 =
MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8858 }
else if (Src0Size > Src1Size) {
8859 auto ShiftAmt =
MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8860 auto Zext =
MIRBuilder.buildZExt(Src0Ty, Src1);
8861 auto Shift =
MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8862 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8864 auto ShiftAmt =
MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8865 auto Shift =
MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8866 auto Trunc =
MIRBuilder.buildTrunc(Src0Ty, Shift);
8867 And1 =
MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8873 unsigned Flags =
MI.getFlags();
8880 MI.eraseFromParent();
8891 switch (
MI.getOpcode()) {
8892 case TargetOpcode::G_FMINNUM:
8893 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8895 case TargetOpcode::G_FMINIMUMNUM:
8896 NewOp = TargetOpcode::G_FMINNUM;
8898 case TargetOpcode::G_FMAXNUM:
8899 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8901 case TargetOpcode::G_FMAXIMUMNUM:
8902 NewOp = TargetOpcode::G_FMAXNUM;
8908 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8909 LLT Ty = MRI.getType(Dst);
8919 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
8922 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
8927 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
8928 MI.eraseFromParent();
8934 unsigned Opc =
MI.getOpcode();
8935 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8936 LLT Ty = MRI.getType(Dst);
8939 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
8941 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8942 unsigned OpcNonIeee =
8943 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8944 bool MinMaxMustRespectOrderedZero =
false;
8948 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8950 MinMaxMustRespectOrderedZero =
true;
8951 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8956 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8964 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
8968 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8970 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8980 const unsigned Flags =
MI.getFlags();
8986 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8988 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8990 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8992 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8994 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8999 MI.eraseFromParent();
9006 LLT Ty = MRI.getType(DstReg);
9007 unsigned Flags =
MI.getFlags();
9012 MI.eraseFromParent();
9018 auto [DstReg,
X] =
MI.getFirst2Regs();
9019 const unsigned Flags =
MI.getFlags();
9020 const LLT Ty = MRI.getType(DstReg);
9032 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
9034 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
9039 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
9040 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9041 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9042 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9044 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9046 MI.eraseFromParent();
9051 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9052 unsigned Flags =
MI.getFlags();
9053 LLT Ty = MRI.getType(DstReg);
9060 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9061 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9064 SrcReg, Zero, Flags);
9066 SrcReg, Trunc, Flags);
9070 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9071 MI.eraseFromParent();
9077 const unsigned NumOps =
MI.getNumOperands();
9078 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9079 unsigned PartSize = Src0Ty.getSizeInBits();
9084 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9085 const unsigned Offset = (
I - 1) * PartSize;
9088 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9091 MRI.createGenericVirtualRegister(WideTy);
9094 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9095 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9096 ResultReg = NextResult;
9099 if (DstTy.isPointer()) {
9100 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9101 DstTy.getAddressSpace())) {
9109 MI.eraseFromParent();
9115 const unsigned NumDst =
MI.getNumOperands() - 1;
9116 Register SrcReg =
MI.getOperand(NumDst).getReg();
9117 Register Dst0Reg =
MI.getOperand(0).getReg();
9118 LLT DstTy = MRI.getType(Dst0Reg);
9127 LLT IntTy = MRI.getType(SrcReg);
9132 unsigned Offset = DstSize;
9133 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9135 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9139 MI.eraseFromParent();
9158 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9159 InsertVal =
MI.getOperand(2).getReg();
9161 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9163 LLT VecTy = MRI.getType(SrcVec);
9173 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9174 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9176 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9179 MI.eraseFromParent();
9184 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9195 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9202 int64_t
Offset = IdxVal * EltBytes;
9213 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9216 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9218 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9221 MI.eraseFromParent();
9227 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9228 MI.getFirst3RegLLTs();
9238 for (
int Idx : Mask) {
9240 if (!
Undef.isValid())
9246 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9248 int NumElts = Src0Ty.getNumElements();
9249 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9250 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9251 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9253 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9255 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9260 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9261 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9262 MI.eraseFromParent();
9268 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9269 MI.getFirst4RegLLTs();
9271 if (VecTy.isScalableVector())
9287 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9290 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9293 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9296 std::optional<APInt> PassthruSplatVal =
9299 if (PassthruSplatVal.has_value()) {
9301 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9302 }
else if (HasPassthru) {
9303 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9304 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9310 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9314 unsigned NumElmts = VecTy.getNumElements();
9315 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9317 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9320 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9323 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9328 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9330 if (HasPassthru &&
I == NumElmts - 1) {
9333 auto AllLanesSelected =
MIRBuilder.buildICmp(
9335 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9336 {OutPos, EndOfVector});
9340 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9342 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9347 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9349 MI.eraseFromParent();
9360 SPTmp =
MIRBuilder.buildCast(IntPtrTy, SPTmp);
9366 if (Alignment >
Align(1)) {
9369 auto AlignCst =
MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9378 const auto &MF = *
MI.getMF();
9384 Register AllocSize =
MI.getOperand(1).getReg();
9387 LLT PtrTy = MRI.getType(Dst);
9388 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9395 MI.eraseFromParent();
9401 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9406 MI.eraseFromParent();
9412 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9417 MI.eraseFromParent();
9423 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9424 unsigned Offset =
MI.getOperand(2).getImm();
9427 if (SrcTy.isVector()) {
9428 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9429 unsigned DstSize = DstTy.getSizeInBits();
9431 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9432 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9434 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9438 for (
unsigned Idx =
Offset / SrcEltSize;
9439 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9440 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9442 if (SubVectorElts.
size() == 1)
9443 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9445 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9447 MI.eraseFromParent();
9452 if (DstTy.isScalar() &&
9453 (SrcTy.isScalar() ||
9454 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9455 LLT SrcIntTy = SrcTy;
9456 if (!SrcTy.isScalar()) {
9458 SrcReg =
MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9465 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9469 MI.eraseFromParent();
9477 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9480 LLT DstTy = MRI.getType(Src);
9481 LLT InsertTy = MRI.getType(InsertSrc);
9489 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9491 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9495 for (; Idx <
Offset / EltSize; ++Idx) {
9496 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9501 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9502 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9504 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9513 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9516 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9517 MI.eraseFromParent();
9531 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9535 LLT IntDstTy = DstTy;
9539 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9544 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9550 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9556 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9557 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9558 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9561 MI.eraseFromParent();
9567 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9568 MI.getFirst4RegLLTs();
9569 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9572 LLT BoolTy = Dst1Ty;
9574 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9591 auto ResultLowerThanLHS =
9596 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9599 MI.eraseFromParent();
9605 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9606 const LLT Ty = MRI.getType(Res);
9609 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9610 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9611 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9622 MI.eraseFromParent();
9627 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9628 const LLT Ty = MRI.getType(Res);
9631 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9632 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9633 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9638 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9643 MI.eraseFromParent();
9649 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9650 LLT Ty = MRI.getType(Res);
9654 switch (
MI.getOpcode()) {
9657 case TargetOpcode::G_UADDSAT:
9660 BaseOp = TargetOpcode::G_ADD;
9662 case TargetOpcode::G_SADDSAT:
9665 BaseOp = TargetOpcode::G_ADD;
9667 case TargetOpcode::G_USUBSAT:
9670 BaseOp = TargetOpcode::G_SUB;
9672 case TargetOpcode::G_SSUBSAT:
9675 BaseOp = TargetOpcode::G_SUB;
9690 uint64_t NumBits = Ty.getScalarSizeInBits();
9701 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9709 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9714 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9715 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9718 MI.eraseFromParent();
9724 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9725 LLT Ty = MRI.getType(Res);
9729 unsigned OverflowOp;
9730 switch (
MI.getOpcode()) {
9733 case TargetOpcode::G_UADDSAT:
9736 OverflowOp = TargetOpcode::G_UADDO;
9738 case TargetOpcode::G_SADDSAT:
9741 OverflowOp = TargetOpcode::G_SADDO;
9743 case TargetOpcode::G_USUBSAT:
9746 OverflowOp = TargetOpcode::G_USUBO;
9748 case TargetOpcode::G_SSUBSAT:
9751 OverflowOp = TargetOpcode::G_SSUBO;
9756 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9757 Register Tmp = OverflowRes.getReg(0);
9758 Register Ov = OverflowRes.getReg(1);
9767 uint64_t NumBits = Ty.getScalarSizeInBits();
9768 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
9769 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9772 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
9780 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9784 MI.eraseFromParent();
9790 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9791 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9792 "Expected shlsat opcode!");
9793 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9794 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9795 LLT Ty = MRI.getType(Res);
9799 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
9800 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
9809 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9814 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9816 MI.eraseFromParent();
9821 auto [Dst, Src] =
MI.getFirst2Regs();
9822 const LLT Ty = MRI.getType(Src);
9823 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9824 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9827 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9828 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9829 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9830 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9833 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
9835 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9836 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
9837 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9839 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
9840 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9841 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9843 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9844 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9845 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9847 Res.getInstr()->getOperand(0).setReg(Dst);
9849 MI.eraseFromParent();
9856 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
9859 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9860 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9861 return B.buildOr(Dst,
LHS,
RHS);
9866 auto [Dst, Src] =
MI.getFirst2Regs();
9867 const LLT SrcTy = MRI.getType(Src);
9868 unsigned Size = SrcTy.getScalarSizeInBits();
9869 unsigned VSize = SrcTy.getSizeInBits();
9872 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9873 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9874 {LLT::fixed_vector(VSize / 8, 8),
9875 LLT::fixed_vector(VSize / 8, 8)}}))) {
9880 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
9881 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
9882 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
9886 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9909 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
9913 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
9916 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
9920 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
9924 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
9929 MI.eraseFromParent();
9937 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9938 int NameOpIdx = IsRead ? 1 : 0;
9939 int ValRegIndex = IsRead ? 0 : 1;
9941 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
9942 const LLT Ty = MRI.getType(ValReg);
9944 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9951 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
9952 Fn,
MI.getDebugLoc()));
9956 MI.eraseFromParent();
9965 MI.eraseFromParent();
9971 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
9972 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9974 LLT OrigTy = MRI.getType(Result);
9978 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
9979 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
9981 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9983 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
9984 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
9987 MI.eraseFromParent();
9993 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9998 MI.eraseFromParent();
10003 MI.eraseFromParent();
10010 unsigned BitSize = SrcTy.getScalarSizeInBits();
10014 auto AsInt =
MIRBuilder.buildCopy(IntTy, SrcReg);
10020 APInt ExpMask = Inf;
10022 APInt QNaNBitMask =
10026 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
10027 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
10028 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
10029 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
10030 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
10032 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10036 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
10038 LLT DstTyCopy = DstTy;
10040 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10068 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10071 Mask &= ~PartialCheck;
10080 else if (PartialCheck ==
fcZero)
10092 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10093 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10094 auto SubnormalRes =
10096 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10098 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10099 appendToRes(SubnormalRes);
10106 else if (PartialCheck ==
fcInf)
10111 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10118 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10119 if (PartialCheck ==
fcNan) {
10123 }
else if (PartialCheck ==
fcQNan) {
10133 Abs, InfWithQnanBitC);
10134 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10141 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10143 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10144 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10147 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10149 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10152 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10153 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10155 appendToRes(NormalRes);
10159 MI.eraseFromParent();
10165 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10166 MI.getFirst4RegLLTs();
10168 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10172 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10173 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10177 if (MaskTy.isScalar()) {
10185 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10189 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10191 if (DstTy.isVector()) {
10193 auto ShufSplat =
MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10194 MaskReg = ShufSplat.getReg(0);
10199 }
else if (!DstTy.isVector()) {
10204 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10208 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10209 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10210 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10217 MI.eraseFromParent();
10223 unsigned Opcode =
MI.getOpcode();
10226 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10227 : TargetOpcode::G_UDIV,
10228 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10230 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10231 : TargetOpcode::G_UREM,
10232 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10233 MI.eraseFromParent();
10243 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10247 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10250 MI.eraseFromParent();
10260 Register SrcReg =
MI.getOperand(1).getReg();
10261 LLT Ty = MRI.getType(SrcReg);
10262 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10265 MI.eraseFromParent();
10271 Register SrcReg =
MI.getOperand(1).getReg();
10272 Register DestReg =
MI.getOperand(0).getReg();
10274 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10275 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10278 MI.eraseFromParent();
10284 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10285 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10286 "Expected G_ABDS or G_ABDU instruction");
10288 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10289 LLT Ty = MRI.getType(LHS);
10299 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10301 MI.eraseFromParent();
10307 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10308 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10309 "Expected G_ABDS or G_ABDU instruction");
10311 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10312 LLT Ty = MRI.getType(LHS);
10317 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10318 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10319 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10321 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10322 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10324 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10326 MI.eraseFromParent();
10331 Register SrcReg =
MI.getOperand(1).getReg();
10332 Register DstReg =
MI.getOperand(0).getReg();
10334 LLT Ty = MRI.getType(DstReg);
10342 MI.eraseFromParent();
10348 Register SrcReg =
MI.getOperand(1).getReg();
10349 LLT SrcTy = MRI.getType(SrcReg);
10350 LLT DstTy = MRI.getType(SrcReg);
10353 if (SrcTy.isScalar()) {
10358 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10369 Register ListPtr =
MI.getOperand(1).getReg();
10370 LLT PtrTy = MRI.getType(ListPtr);
10377 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10379 const Align A(
MI.getOperand(2).getImm());
10381 if (
A > TLI.getMinStackArgumentAlignment()) {
10383 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10384 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10385 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10386 VAList = AndDst.getReg(0);
10393 LLT LLTTy = MRI.getType(Dst);
10396 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10397 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10402 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10404 Align EltAlignment =
DL.getABITypeAlign(Ty);
10407 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10409 MI.eraseFromParent();
10424 unsigned Limit,
const MemOp &
Op,
10425 unsigned DstAS,
unsigned SrcAS,
10426 const AttributeList &FuncAttributes,
10428 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
10438 if (
Op.isFixedDstAlign())
10439 while (
Op.getDstAlign() < Ty.getSizeInBytes() &&
10442 assert(Ty.getSizeInBits() > 0 &&
"Could not find valid type");
10446 unsigned NumMemOps = 0;
10449 unsigned TySize = Ty.getSizeInBytes();
10450 while (TySize >
Size) {
10459 assert(NewTySize > 0 &&
"Could not find appropriate type");
10466 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
10468 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
10474 TySize = NewTySize;
10478 if (++NumMemOps > Limit)
10481 MemOps.push_back(Ty);
10491 unsigned NumBits = Ty.getScalarSizeInBits();
10493 if (!Ty.isVector() && ValVRegAndVal) {
10494 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10502 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10523 uint64_t KnownLen,
Align Alignment,
10525 auto &MF = *
MI.getParent()->getParent();
10530 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10532 bool DstAlignCanChange =
false;
10536 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10538 DstAlignCanChange =
true;
10540 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10541 std::vector<LLT> MemOps;
10543 const auto &DstMMO = **
MI.memoperands_begin();
10544 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10547 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10558 if (DstAlignCanChange) {
10561 Align NewAlign =
DL.getABITypeAlign(IRTy);
10562 if (NewAlign > Alignment) {
10563 Alignment = NewAlign;
10571 MachineIRBuilder MIB(
MI);
10573 LLT LargestTy = MemOps[0];
10574 for (
unsigned i = 1; i < MemOps.size(); i++)
10576 LargestTy = MemOps[i];
10588 LLT PtrTy = MRI.getType(Dst);
10589 unsigned DstOff = 0;
10590 unsigned Size = KnownLen;
10591 for (
unsigned I = 0;
I < MemOps.size();
I++) {
10592 LLT Ty = MemOps[
I];
10594 if (TySize >
Size) {
10597 assert(
I == MemOps.size() - 1 &&
I != 0);
10598 DstOff -= TySize -
Size;
10608 TLI.isTruncateFree(LargestVT, VT))
10609 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10622 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10625 MIB.buildStore(
Value, Ptr, *StoreMMO);
10630 MI.eraseFromParent();
10636 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10638 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10640 const auto *MMOIt =
MI.memoperands_begin();
10642 bool IsVolatile =
MemOp->isVolatile();
10648 "inline memcpy with dynamic size is not yet supported");
10649 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10650 if (KnownLen == 0) {
10651 MI.eraseFromParent();
10655 const auto &DstMMO = **
MI.memoperands_begin();
10656 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10657 Align DstAlign = DstMMO.getBaseAlign();
10658 Align SrcAlign = SrcMMO.getBaseAlign();
10660 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10667 Align SrcAlign,
bool IsVolatile) {
10668 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10669 return lowerMemcpy(
MI, Dst, Src, KnownLen,
10670 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10677 Align SrcAlign,
bool IsVolatile) {
10678 auto &MF = *
MI.getParent()->getParent();
10683 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10685 bool DstAlignCanChange =
false;
10687 Align Alignment = std::min(DstAlign, SrcAlign);
10691 DstAlignCanChange =
true;
10697 std::vector<LLT> MemOps;
10699 const auto &DstMMO = **
MI.memoperands_begin();
10700 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10706 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10712 if (DstAlignCanChange) {
10715 Align NewAlign =
DL.getABITypeAlign(IRTy);
10720 if (!
TRI->hasStackRealignment(MF))
10722 NewAlign = std::min(NewAlign, *StackAlign);
10724 if (NewAlign > Alignment) {
10725 Alignment = NewAlign;
10733 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
10735 MachineIRBuilder MIB(
MI);
10741 unsigned CurrOffset = 0;
10742 unsigned Size = KnownLen;
10743 for (
auto CopyTy : MemOps) {
10746 if (CopyTy.getSizeInBytes() >
Size)
10747 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
10758 if (CurrOffset != 0) {
10759 LLT SrcTy = MRI.getType(Src);
10762 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10764 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10768 if (CurrOffset != 0) {
10769 LLT DstTy = MRI.getType(Dst);
10770 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
10772 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10773 CurrOffset += CopyTy.getSizeInBytes();
10774 Size -= CopyTy.getSizeInBytes();
10777 MI.eraseFromParent();
10783 uint64_t KnownLen,
Align DstAlign,
Align SrcAlign,
10785 auto &MF = *
MI.getParent()->getParent();
10790 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
10792 bool DstAlignCanChange =
false;
10795 Align Alignment = std::min(DstAlign, SrcAlign);
10797 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10799 DstAlignCanChange =
true;
10801 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10802 std::vector<LLT> MemOps;
10804 const auto &DstMMO = **
MI.memoperands_begin();
10805 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10806 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10807 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10814 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10820 if (DstAlignCanChange) {
10823 Align NewAlign =
DL.getABITypeAlign(IRTy);
10828 if (!
TRI->hasStackRealignment(MF))
10829 if (MaybeAlign StackAlign =
DL.getStackAlignment())
10830 NewAlign = std::min(NewAlign, *StackAlign);
10832 if (NewAlign > Alignment) {
10833 Alignment = NewAlign;
10841 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
10843 MachineIRBuilder MIB(
MI);
10847 unsigned CurrOffset = 0;
10848 SmallVector<Register, 16> LoadVals;
10849 for (
auto CopyTy : MemOps) {
10856 if (CurrOffset != 0) {
10857 LLT SrcTy = MRI.getType(Src);
10860 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10862 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10863 CurrOffset += CopyTy.getSizeInBytes();
10867 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
10868 LLT CopyTy = MemOps[
I];
10874 if (CurrOffset != 0) {
10875 LLT DstTy = MRI.getType(Dst);
10878 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
10880 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
10883 MI.eraseFromParent();
10889 const unsigned Opc =
MI.getOpcode();
10892 assert((
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMMOVE ||
10893 Opc == TargetOpcode::G_MEMSET) &&
10894 "Expected memcpy like instruction");
10896 auto MMOIt =
MI.memoperands_begin();
10901 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10903 if (
Opc != TargetOpcode::G_MEMSET) {
10904 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
10905 MemOp = *(++MMOIt);
10906 SrcAlign =
MemOp->getBaseAlign();
10911 if (!LenVRegAndVal)
10913 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10915 if (KnownLen == 0) {
10916 MI.eraseFromParent();
10920 if (MaxLen && KnownLen > MaxLen)
10923 bool IsVolatile =
MemOp->isVolatile();
10924 if (
Opc == TargetOpcode::G_MEMCPY) {
10925 auto &MF = *
MI.getParent()->getParent();
10928 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10929 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10932 if (
Opc == TargetOpcode::G_MEMMOVE)
10933 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10934 if (
Opc == TargetOpcode::G_MEMSET)
10935 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
constexpr LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.