43#define DEBUG_TYPE "legalizer"
56static std::pair<int, int>
62 unsigned NumParts =
Size / NarrowSize;
63 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
66 if (LeftoverSize == 0)
71 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
88 switch (Ty.getSizeInBits()) {
129 auto Step = LI.getAction(
MI, MRI);
130 switch (Step.Action) {
145 return bitcast(
MI, Step.TypeIdx, Step.NewType);
148 return lower(
MI, Step.TypeIdx, Step.NewType);
157 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
165void LegalizerHelper::insertParts(
Register DstReg,
187 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
189 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
203 LLT Ty = MRI.getType(
Reg);
211void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
214 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
218 if (!MRI.getType(Leftover).isVector())
221 appendVectorElts(AllElts, Leftover);
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
229 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
231 const int StartIdx = Regs.
size();
232 const int NumResults =
MI.getNumOperands() - 1;
234 for (
int I = 0;
I != NumResults; ++
I)
235 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
247 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
254 LLT SrcTy = MRI.getType(SrcReg);
256 extractGCDType(Parts, GCDTy, SrcReg);
260LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
262 unsigned PadStrategy) {
267 int NumOrigSrc = VRegs.
size();
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
279 assert(PadStrategy == TargetOpcode::G_SEXT);
284 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
300 for (
int I = 0;
I != NumParts; ++
I) {
301 bool AllMergePartsArePadding =
true;
304 for (
int J = 0; J != NumSubParts; ++J) {
305 int Idx =
I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
311 SubMerge[J] = VRegs[Idx];
314 AllMergePartsArePadding =
false;
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
333 Remerge[
I] = AllPadReg;
337 if (NumSubParts == 1)
338 Remerge[
I] = SubMerge[0];
340 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[
I];
347 VRegs = std::move(Remerge);
351void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
353 LLT DstTy = MRI.getType(DstReg);
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
363 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
372 UnmergeDefs[0] = DstReg;
373 for (
unsigned I = 1;
I != NumDefs; ++
I)
374 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
385#define RTLIBCASE_INT(LibcallPrefix) \
389 return RTLIB::LibcallPrefix##32; \
391 return RTLIB::LibcallPrefix##64; \
393 return RTLIB::LibcallPrefix##128; \
395 llvm_unreachable("unexpected size"); \
399#define RTLIBCASE(LibcallPrefix) \
403 return RTLIB::LibcallPrefix##32; \
405 return RTLIB::LibcallPrefix##64; \
407 return RTLIB::LibcallPrefix##80; \
409 return RTLIB::LibcallPrefix##128; \
411 llvm_unreachable("unexpected size"); \
416 case TargetOpcode::G_LROUND:
418 case TargetOpcode::G_LLROUND:
420 case TargetOpcode::G_MUL:
422 case TargetOpcode::G_SDIV:
424 case TargetOpcode::G_UDIV:
426 case TargetOpcode::G_SREM:
428 case TargetOpcode::G_UREM:
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
432 case TargetOpcode::G_FADD:
434 case TargetOpcode::G_FSUB:
436 case TargetOpcode::G_FMUL:
438 case TargetOpcode::G_FDIV:
440 case TargetOpcode::G_FEXP:
442 case TargetOpcode::G_FEXP2:
444 case TargetOpcode::G_FEXP10:
446 case TargetOpcode::G_FREM:
448 case TargetOpcode::G_FPOW:
450 case TargetOpcode::G_FPOWI:
452 case TargetOpcode::G_FMA:
454 case TargetOpcode::G_FSIN:
456 case TargetOpcode::G_FCOS:
458 case TargetOpcode::G_FTAN:
460 case TargetOpcode::G_FASIN:
462 case TargetOpcode::G_FACOS:
464 case TargetOpcode::G_FATAN:
466 case TargetOpcode::G_FATAN2:
468 case TargetOpcode::G_FSINH:
470 case TargetOpcode::G_FCOSH:
472 case TargetOpcode::G_FTANH:
474 case TargetOpcode::G_FSINCOS:
476 case TargetOpcode::G_FMODF:
478 case TargetOpcode::G_FLOG10:
480 case TargetOpcode::G_FLOG:
482 case TargetOpcode::G_FLOG2:
484 case TargetOpcode::G_FLDEXP:
486 case TargetOpcode::G_FCEIL:
488 case TargetOpcode::G_FFLOOR:
490 case TargetOpcode::G_FMINNUM:
492 case TargetOpcode::G_FMAXNUM:
494 case TargetOpcode::G_FMINIMUMNUM:
496 case TargetOpcode::G_FMAXIMUMNUM:
498 case TargetOpcode::G_FSQRT:
500 case TargetOpcode::G_FRINT:
502 case TargetOpcode::G_FNEARBYINT:
504 case TargetOpcode::G_INTRINSIC_TRUNC:
506 case TargetOpcode::G_INTRINSIC_ROUND:
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
510 case TargetOpcode::G_INTRINSIC_LRINT:
512 case TargetOpcode::G_INTRINSIC_LLRINT:
532 AttributeList CallerAttrs =
F.getAttributes();
533 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
552 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
559 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
567 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
570 if (Ret->getNumImplicitOperands() != 1)
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
590 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
595 Info.OrigRet = Result;
598 (Result.Ty->isVoidTy() ||
599 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
607 if (
MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
618 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
622 Next->eraseFromParent();
623 }
while (
MI->getNextNode());
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
657 Args.push_back({MO.getReg(), OpType, 0});
671 LLT DstTy =
MRI.getType(DstSin);
676 unsigned AddrSpace =
DL.getAllocaAddrSpace();
694 if (LibcallResult != LegalizeResult::Legalized)
702 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
719 LLT DstTy = MRI.getType(DstFrac);
724 unsigned AddrSpace =
DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
737 if (LibcallResult != LegalizeResult::Legalized)
743 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
755 case TargetOpcode::G_FPEXT:
757 case TargetOpcode::G_FPTRUNC:
759 case TargetOpcode::G_FPTOSI:
761 case TargetOpcode::G_FPTOUI:
763 case TargetOpcode::G_SITOFP:
765 case TargetOpcode::G_UITOFP:
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.
Flags[0].setSExt();
779 Arg.
Flags[0].setZExt();
790 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
794 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy =
nullptr;
804 Args.push_back({Reg, OpTy, 0});
807 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc =
MI.getOpcode();
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
836 if (RTLibcallImpl == RTLIB::Unsupported) {
843 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
850 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
868 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
872 Next->eraseFromParent();
873 }
while (
MI.getNextNode());
883 unsigned Opc =
MI.getOpcode();
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
890 return RTLIB::UNKNOWN_LIBCALL;
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
923 return RTLIB::UNKNOWN_LIBCALL;
936 unsigned Opc =
MI.getOpcode();
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
946 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) =
MI.getFirst5RegLLTs();
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
967 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
971 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
986 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
991 if (RTLibcallImpl == RTLIB::Unsupported) {
998 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (
MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1060 LLT StateTy = MRI.getType(Dst);
1063 MachinePointerInfo TempPtrInfo;
1067 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1098 LLT StateTy = MRI.getType(Src);
1101 MachinePointerInfo TempPtrInfo;
1110 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver,
nullptr);
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1134 llvm_unreachable("unexpected size"); \
1165 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1168 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond =
Cmp->getCond();
1181 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1190 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1197 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1205 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1239 const auto [OeqLibcall, OeqPred] =
1244 const auto [UnoLibcall, UnoPred] =
1249 if (NotOeq && NotUno)
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1295 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1297 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1311 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1313 switch (
MI.getOpcode()) {
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1369 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1382 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1392 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1405 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1407 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1413 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1416 MI.eraseFromParent();
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1425 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1427 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1432 {
MI.getOperand(1).getReg(), HLTy, 0},
1433 {
MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1445 if (!FromTy || !ToTy)
1452 case TargetOpcode::G_FCMP: {
1456 MI.eraseFromParent();
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1464 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1468 FromTy, LocObserver);
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1508 MI.eraseFromParent();
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1534 MI.eraseFromParent();
1541 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1544 switch (
MI.getOpcode()) {
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1549 LLT DstTy = MRI.getType(DstReg);
1557 if (SizeOp0 % NarrowSize != 0) {
1562 MI.eraseFromParent();
1566 int NumParts = SizeOp0 / NarrowSize;
1569 for (
int i = 0; i < NumParts; ++i)
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1581 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1587 for (
int I = 0;
I != NumParts; ++
I) {
1588 unsigned Offset =
I * NarrowSize;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1597 if (LeftoverBits != 0) {
1601 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1605 insertParts(
MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1608 MI.eraseFromParent();
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1615 case TargetOpcode::G_TRUNC: {
1619 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1621 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1625 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1626 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1635 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1640 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1642 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1644 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1648 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1666 case TargetOpcode::G_EXTRACT:
1668 case TargetOpcode::G_INSERT:
1670 case TargetOpcode::G_LOAD: {
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1681 LoadMI.eraseFromParent();
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD: {
1690 Register DstReg = LoadMI.getDstReg();
1691 Register PtrReg = LoadMI.getPointerReg();
1693 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1694 auto &MMO = LoadMI.getMMO();
1697 if (MemSize == NarrowSize) {
1699 }
else if (MemSize < NarrowSize) {
1700 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1701 }
else if (MemSize > NarrowSize) {
1711 LoadMI.eraseFromParent();
1714 case TargetOpcode::G_STORE: {
1717 Register SrcReg = StoreMI.getValueReg();
1718 LLT SrcTy = MRI.getType(SrcReg);
1719 if (SrcTy.isVector())
1722 int NumParts = SizeOp0 / NarrowSize;
1724 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1725 if (SrcTy.isVector() && LeftoverBits != 0)
1728 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1729 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1731 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1732 StoreMI.eraseFromParent();
1738 case TargetOpcode::G_SELECT:
1740 case TargetOpcode::G_AND:
1741 case TargetOpcode::G_OR:
1742 case TargetOpcode::G_XOR: {
1754 case TargetOpcode::G_SHL:
1755 case TargetOpcode::G_LSHR:
1756 case TargetOpcode::G_ASHR:
1758 case TargetOpcode::G_CTLZ:
1759 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1760 case TargetOpcode::G_CTTZ:
1761 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTLS:
1763 case TargetOpcode::G_CTPOP:
1765 switch (
MI.getOpcode()) {
1766 case TargetOpcode::G_CTLZ:
1767 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1769 case TargetOpcode::G_CTTZ:
1770 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1772 case TargetOpcode::G_CTPOP:
1774 case TargetOpcode::G_CTLS:
1784 case TargetOpcode::G_INTTOPTR:
1792 case TargetOpcode::G_PTRTOINT:
1800 case TargetOpcode::G_PHI: {
1803 if (SizeOp0 % NarrowSize != 0)
1806 unsigned NumParts = SizeOp0 / NarrowSize;
1810 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1818 for (
unsigned i = 0; i < NumParts; ++i) {
1819 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1821 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1822 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1823 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1826 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1828 MI.eraseFromParent();
1831 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1832 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1842 case TargetOpcode::G_ICMP: {
1844 LLT SrcTy = MRI.getType(LHS);
1850 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1856 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1857 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1863 LLT ResTy = MRI.getType(Dst);
1868 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1870 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1871 auto LHS = std::get<0>(LHSAndRHS);
1872 auto RHS = std::get<1>(LHSAndRHS);
1873 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1880 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1881 auto LHS = std::get<0>(LHSAndRHS);
1882 auto RHS = std::get<1>(LHSAndRHS);
1883 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1884 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1885 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1886 TargetOpcode::G_ZEXT);
1893 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1894 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1895 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1900 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1904 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1909 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1916 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1919 LHSPartRegs[
I], RHSPartRegs[
I]);
1920 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1926 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1940 RHSLeftoverRegs[
I]);
1942 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1943 RHSLeftoverRegs[
I]);
1946 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1947 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1953 MI.eraseFromParent();
1956 case TargetOpcode::G_FCMP:
1965 case TargetOpcode::G_SEXT_INREG: {
1969 int64_t SizeInBits =
MI.getOperand(2).getImm();
1978 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
1979 MO1.
setReg(TruncMIB.getReg(0));
1982 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1994 if (SizeOp0 % NarrowSize != 0)
1996 int NumParts = SizeOp0 / NarrowSize;
2004 for (
int i = 0; i < NumParts; ++i) {
2005 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2020 for (
int i = 0; i < NumParts; ++i) {
2023 PartialExtensionReg = DstRegs.
back();
2025 assert(PartialExtensionReg &&
2026 "Expected to visit partial extension before full");
2027 if (FullExtensionReg) {
2032 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2034 FullExtensionReg = DstRegs.
back();
2039 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2042 PartialExtensionReg = DstRegs.
back();
2048 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2049 MI.eraseFromParent();
2052 case TargetOpcode::G_BSWAP:
2053 case TargetOpcode::G_BITREVERSE: {
2054 if (SizeOp0 % NarrowSize != 0)
2059 unsigned NumParts = SizeOp0 / NarrowSize;
2060 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2063 for (
unsigned i = 0; i < NumParts; ++i) {
2064 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2065 {SrcRegs[NumParts - 1 - i]});
2069 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2072 MI.eraseFromParent();
2075 case TargetOpcode::G_PTR_ADD:
2076 case TargetOpcode::G_PTRMASK: {
2084 case TargetOpcode::G_FPTOUI:
2085 case TargetOpcode::G_FPTOSI:
2086 case TargetOpcode::G_FPTOUI_SAT:
2087 case TargetOpcode::G_FPTOSI_SAT:
2089 case TargetOpcode::G_FPEXT:
2096 case TargetOpcode::G_FLDEXP:
2097 case TargetOpcode::G_STRICT_FLDEXP:
2099 case TargetOpcode::G_VSCALE: {
2101 LLT Ty = MRI.getType(Dst);
2105 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2106 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2107 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2110 MI.eraseFromParent();
2117 LLT Ty = MRI.getType(Val);
2123 if (Ty.isPointer()) {
2124 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2126 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2132 if (Ty.isPointerVector())
2133 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2134 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138 unsigned OpIdx,
unsigned ExtOpcode) {
2140 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2141 MO.
setReg(ExtB.getReg(0));
2147 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2148 MO.
setReg(ExtB.getReg(0));
2152 unsigned OpIdx,
unsigned TruncOpcode) {
2154 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2156 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2161 unsigned OpIdx,
unsigned ExtOpcode) {
2163 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2165 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2174 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2176 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2182 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2192 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2199LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2204 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2205 if (DstTy.isVector())
2208 LLT SrcTy =
MRI.getType(Src1Reg);
2209 const int DstSize = DstTy.getSizeInBits();
2210 const int SrcSize = SrcTy.getSizeInBits();
2212 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2214 unsigned NumOps =
MI.getNumOperands();
2215 unsigned NumSrc =
MI.getNumOperands() - 1;
2216 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2218 if (WideSize >= DstSize) {
2222 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2223 const unsigned Offset = (
I - 1) * PartSize;
2231 MRI.createGenericVirtualRegister(WideTy);
2236 ResultReg = NextResult;
2239 if (WideSize > DstSize)
2241 else if (DstTy.isPointer())
2244 MI.eraseFromParent();
2269 const int GCD = std::gcd(SrcSize, WideSize);
2279 if (GCD == SrcSize) {
2282 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2283 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2289 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2291 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2295 const int PartsPerGCD = WideSize / GCD;
2299 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2301 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2308 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2310 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2311 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2314 MI.eraseFromParent();
2319LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2324 int NumDst =
MI.getNumOperands() - 1;
2325 Register SrcReg =
MI.getOperand(NumDst).getReg();
2326 LLT SrcTy = MRI.getType(SrcReg);
2330 Register Dst0Reg =
MI.getOperand(0).getReg();
2331 LLT DstTy = MRI.getType(Dst0Reg);
2340 dbgs() <<
"Not casting non-integral address space integer\n");
2345 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2353 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2361 for (
int I = 1;
I != NumDst; ++
I) {
2362 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2363 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MI.eraseFromParent();
2378 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2382 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2385 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2403 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2408 if (PartsPerRemerge == 1) {
2411 for (
int I = 0;
I != NumUnmerge; ++
I) {
2412 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2414 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2415 int Idx =
I * PartsPerUnmerge + J;
2417 MIB.addDef(
MI.getOperand(Idx).getReg());
2420 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 MIB.addUse(Unmerge.getReg(
I));
2427 SmallVector<Register, 16> Parts;
2428 for (
int J = 0; J != NumUnmerge; ++J)
2429 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2432 for (
int I = 0;
I != NumDst; ++
I) {
2433 for (
int J = 0; J < PartsPerRemerge; ++J) {
2434 const int Idx =
I * PartsPerRemerge + J;
2438 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2439 RemergeParts.
clear();
2443 MI.eraseFromParent();
2448LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2450 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2451 unsigned Offset =
MI.getOperand(2).getImm();
2454 if (SrcTy.
isVector() || DstTy.isVector())
2466 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 if (DstTy.isPointer())
2477 MI.eraseFromParent();
2482 LLT ShiftTy = SrcTy;
2491 MI.eraseFromParent();
2522LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2524 if (TypeIdx != 0 || WideTy.
isVector())
2534LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2538 std::optional<Register> CarryIn;
2539 switch (
MI.getOpcode()) {
2542 case TargetOpcode::G_SADDO:
2543 Opcode = TargetOpcode::G_ADD;
2544 ExtOpcode = TargetOpcode::G_SEXT;
2546 case TargetOpcode::G_SSUBO:
2547 Opcode = TargetOpcode::G_SUB;
2548 ExtOpcode = TargetOpcode::G_SEXT;
2550 case TargetOpcode::G_UADDO:
2551 Opcode = TargetOpcode::G_ADD;
2552 ExtOpcode = TargetOpcode::G_ZEXT;
2554 case TargetOpcode::G_USUBO:
2555 Opcode = TargetOpcode::G_SUB;
2556 ExtOpcode = TargetOpcode::G_ZEXT;
2558 case TargetOpcode::G_SADDE:
2559 Opcode = TargetOpcode::G_UADDE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn =
MI.getOperand(4).getReg();
2563 case TargetOpcode::G_SSUBE:
2564 Opcode = TargetOpcode::G_USUBE;
2565 ExtOpcode = TargetOpcode::G_SEXT;
2566 CarryIn =
MI.getOperand(4).getReg();
2568 case TargetOpcode::G_UADDE:
2569 Opcode = TargetOpcode::G_UADDE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn =
MI.getOperand(4).getReg();
2573 case TargetOpcode::G_USUBE:
2574 Opcode = TargetOpcode::G_USUBE;
2575 ExtOpcode = TargetOpcode::G_ZEXT;
2576 CarryIn =
MI.getOperand(4).getReg();
2592 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2593 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2597 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2599 .buildInstr(Opcode, {WideTy, CarryOutTy},
2600 {LHSExt, RHSExt, *CarryIn})
2603 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2605 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2606 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2607 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2612 MI.eraseFromParent();
2617LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2619 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2621 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2622 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2623 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2636 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2643 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2648 {ShiftL, ShiftR},
MI.getFlags());
2653 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2656 MI.eraseFromParent();
2661LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2670 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2672 LLT SrcTy = MRI.getType(
LHS);
2673 LLT OverflowTy = MRI.getType(OriginalOverflow);
2680 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2681 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2682 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2689 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2691 MachineInstrBuilder Mulo;
2692 if (WideMulCanOverflow)
2693 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2694 {LeftOperand, RightOperand});
2696 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2701 MachineInstrBuilder ExtResult;
2708 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2712 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2715 if (WideMulCanOverflow) {
2723 MI.eraseFromParent();
2729 unsigned Opcode =
MI.getOpcode();
2733 case TargetOpcode::G_ATOMICRMW_XCHG:
2734 case TargetOpcode::G_ATOMICRMW_ADD:
2735 case TargetOpcode::G_ATOMICRMW_SUB:
2736 case TargetOpcode::G_ATOMICRMW_AND:
2737 case TargetOpcode::G_ATOMICRMW_OR:
2738 case TargetOpcode::G_ATOMICRMW_XOR:
2739 case TargetOpcode::G_ATOMICRMW_MIN:
2740 case TargetOpcode::G_ATOMICRMW_MAX:
2741 case TargetOpcode::G_ATOMICRMW_UMIN:
2742 case TargetOpcode::G_ATOMICRMW_UMAX:
2743 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2749 case TargetOpcode::G_ATOMIC_CMPXCHG:
2750 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2757 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2767 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2772 case TargetOpcode::G_EXTRACT:
2773 return widenScalarExtract(
MI, TypeIdx, WideTy);
2774 case TargetOpcode::G_INSERT:
2775 return widenScalarInsert(
MI, TypeIdx, WideTy);
2776 case TargetOpcode::G_MERGE_VALUES:
2777 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2778 case TargetOpcode::G_UNMERGE_VALUES:
2779 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2780 case TargetOpcode::G_SADDO:
2781 case TargetOpcode::G_SSUBO:
2782 case TargetOpcode::G_UADDO:
2783 case TargetOpcode::G_USUBO:
2784 case TargetOpcode::G_SADDE:
2785 case TargetOpcode::G_SSUBE:
2786 case TargetOpcode::G_UADDE:
2787 case TargetOpcode::G_USUBE:
2788 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2789 case TargetOpcode::G_UMULO:
2790 case TargetOpcode::G_SMULO:
2791 return widenScalarMulo(
MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_SADDSAT:
2793 case TargetOpcode::G_SSUBSAT:
2794 case TargetOpcode::G_SSHLSAT:
2795 case TargetOpcode::G_UADDSAT:
2796 case TargetOpcode::G_USUBSAT:
2797 case TargetOpcode::G_USHLSAT:
2798 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_CTTZ:
2800 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2801 case TargetOpcode::G_CTLZ:
2802 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2803 case TargetOpcode::G_CTLS:
2804 case TargetOpcode::G_CTPOP: {
2817 case TargetOpcode::G_CTTZ:
2818 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2819 ExtOpc = TargetOpcode::G_ANYEXT;
2821 case TargetOpcode::G_CTLS:
2822 ExtOpc = TargetOpcode::G_SEXT;
2825 ExtOpc = TargetOpcode::G_ZEXT;
2828 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2829 LLT CurTy = MRI.getType(SrcReg);
2830 unsigned NewOpc = Opcode;
2831 if (NewOpc == TargetOpcode::G_CTTZ) {
2838 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2840 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2845 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2855 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2857 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2860 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff));
2863 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2864 MI.eraseFromParent();
2867 case TargetOpcode::G_BSWAP: {
2871 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2872 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2873 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2876 MI.getOperand(0).setReg(DstExt);
2880 LLT Ty = MRI.getType(DstReg);
2882 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2883 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2889 case TargetOpcode::G_BITREVERSE: {
2893 LLT Ty = MRI.getType(DstReg);
2896 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2898 MI.getOperand(0).setReg(DstExt);
2901 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2902 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2907 case TargetOpcode::G_FREEZE:
2908 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2915 case TargetOpcode::G_ABS:
2922 case TargetOpcode::G_ADD:
2923 case TargetOpcode::G_AND:
2924 case TargetOpcode::G_MUL:
2925 case TargetOpcode::G_OR:
2926 case TargetOpcode::G_XOR:
2927 case TargetOpcode::G_SUB:
2928 case TargetOpcode::G_SHUFFLE_VECTOR:
2939 case TargetOpcode::G_SBFX:
2940 case TargetOpcode::G_UBFX:
2954 case TargetOpcode::G_SHL:
2970 case TargetOpcode::G_ROTR:
2971 case TargetOpcode::G_ROTL:
2980 case TargetOpcode::G_SDIV:
2981 case TargetOpcode::G_SREM:
2982 case TargetOpcode::G_SMIN:
2983 case TargetOpcode::G_SMAX:
2984 case TargetOpcode::G_ABDS:
2992 case TargetOpcode::G_SDIVREM:
3002 case TargetOpcode::G_ASHR:
3003 case TargetOpcode::G_LSHR:
3007 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3008 : TargetOpcode::G_ZEXT;
3021 case TargetOpcode::G_UDIV:
3022 case TargetOpcode::G_UREM:
3023 case TargetOpcode::G_ABDU:
3030 case TargetOpcode::G_UDIVREM:
3039 case TargetOpcode::G_UMIN:
3040 case TargetOpcode::G_UMAX: {
3041 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3043 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3047 ? TargetOpcode::G_SEXT
3048 : TargetOpcode::G_ZEXT;
3058 case TargetOpcode::G_SELECT:
3068 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3075 case TargetOpcode::G_FPEXT:
3083 case TargetOpcode::G_FPTOSI:
3084 case TargetOpcode::G_FPTOUI:
3085 case TargetOpcode::G_INTRINSIC_LRINT:
3086 case TargetOpcode::G_INTRINSIC_LLRINT:
3087 case TargetOpcode::G_IS_FPCLASS:
3097 case TargetOpcode::G_SITOFP:
3107 case TargetOpcode::G_UITOFP:
3117 case TargetOpcode::G_FPTOSI_SAT:
3118 case TargetOpcode::G_FPTOUI_SAT:
3123 LLT Ty = MRI.getType(OldDst);
3124 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3126 MI.getOperand(0).setReg(ExtReg);
3127 uint64_t ShortBits = Ty.getScalarSizeInBits();
3130 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3141 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3142 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3150 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3158 case TargetOpcode::G_LOAD:
3159 case TargetOpcode::G_SEXTLOAD:
3160 case TargetOpcode::G_ZEXTLOAD:
3166 case TargetOpcode::G_STORE: {
3170 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3171 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3172 if (!Ty.isScalar()) {
3180 MI.setMemRefs(MF, {NewMMO});
3187 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3188 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3194 case TargetOpcode::G_CONSTANT: {
3197 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3198 MRI.getType(
MI.getOperand(0).getReg()));
3199 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3200 ExtOpc == TargetOpcode::G_ANYEXT) &&
3203 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3207 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3213 case TargetOpcode::G_FCONSTANT: {
3219 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3221 MI.eraseFromParent();
3224 case TargetOpcode::G_IMPLICIT_DEF: {
3230 case TargetOpcode::G_BRCOND:
3236 case TargetOpcode::G_FCMP:
3247 case TargetOpcode::G_ICMP:
3252 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3256 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3257 unsigned ExtOpcode =
3261 ? TargetOpcode::G_SEXT
3262 : TargetOpcode::G_ZEXT;
3269 case TargetOpcode::G_PTR_ADD:
3270 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3276 case TargetOpcode::G_PHI: {
3277 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3280 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3292 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3295 LLT VecTy = MRI.getType(VecReg);
3301 TargetOpcode::G_ANYEXT);
3315 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3331 LLT VecTy = MRI.getType(VecReg);
3350 case TargetOpcode::G_FADD:
3351 case TargetOpcode::G_FMUL:
3352 case TargetOpcode::G_FSUB:
3353 case TargetOpcode::G_FMA:
3354 case TargetOpcode::G_FMAD:
3355 case TargetOpcode::G_FNEG:
3356 case TargetOpcode::G_FABS:
3357 case TargetOpcode::G_FCANONICALIZE:
3358 case TargetOpcode::G_FMINNUM:
3359 case TargetOpcode::G_FMAXNUM:
3360 case TargetOpcode::G_FMINNUM_IEEE:
3361 case TargetOpcode::G_FMAXNUM_IEEE:
3362 case TargetOpcode::G_FMINIMUM:
3363 case TargetOpcode::G_FMAXIMUM:
3364 case TargetOpcode::G_FMINIMUMNUM:
3365 case TargetOpcode::G_FMAXIMUMNUM:
3366 case TargetOpcode::G_FDIV:
3367 case TargetOpcode::G_FREM:
3368 case TargetOpcode::G_FCEIL:
3369 case TargetOpcode::G_FFLOOR:
3370 case TargetOpcode::G_FCOS:
3371 case TargetOpcode::G_FSIN:
3372 case TargetOpcode::G_FTAN:
3373 case TargetOpcode::G_FACOS:
3374 case TargetOpcode::G_FASIN:
3375 case TargetOpcode::G_FATAN:
3376 case TargetOpcode::G_FATAN2:
3377 case TargetOpcode::G_FCOSH:
3378 case TargetOpcode::G_FSINH:
3379 case TargetOpcode::G_FTANH:
3380 case TargetOpcode::G_FLOG10:
3381 case TargetOpcode::G_FLOG:
3382 case TargetOpcode::G_FLOG2:
3383 case TargetOpcode::G_FRINT:
3384 case TargetOpcode::G_FNEARBYINT:
3385 case TargetOpcode::G_FSQRT:
3386 case TargetOpcode::G_FEXP:
3387 case TargetOpcode::G_FEXP2:
3388 case TargetOpcode::G_FEXP10:
3389 case TargetOpcode::G_FPOW:
3390 case TargetOpcode::G_INTRINSIC_TRUNC:
3391 case TargetOpcode::G_INTRINSIC_ROUND:
3392 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3396 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3402 case TargetOpcode::G_FMODF: {
3412 case TargetOpcode::G_FPOWI:
3413 case TargetOpcode::G_FLDEXP:
3414 case TargetOpcode::G_STRICT_FLDEXP: {
3416 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3437 case TargetOpcode::G_FFREXP: {
3450 case TargetOpcode::G_LROUND:
3451 case TargetOpcode::G_LLROUND:
3462 case TargetOpcode::G_INTTOPTR:
3470 case TargetOpcode::G_PTRTOINT:
3478 case TargetOpcode::G_BUILD_VECTOR: {
3482 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3488 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3496 case TargetOpcode::G_SEXT_INREG:
3505 case TargetOpcode::G_PTRMASK: {
3513 case TargetOpcode::G_VECREDUCE_ADD: {
3522 case TargetOpcode::G_VECREDUCE_FADD:
3523 case TargetOpcode::G_VECREDUCE_FMUL:
3524 case TargetOpcode::G_VECREDUCE_FMIN:
3525 case TargetOpcode::G_VECREDUCE_FMAX:
3526 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3527 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3532 LLT VecTy = MRI.getType(VecReg);
3539 case TargetOpcode::G_VSCALE: {
3546 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3551 case TargetOpcode::G_SPLAT_VECTOR: {
3560 case TargetOpcode::G_INSERT_SUBVECTOR: {
3568 LLT SubVecTy = MRI.getType(SubVec);
3572 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3573 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3574 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3578 auto SplatZero =
MIRBuilder.buildSplatVector(
3583 MI.eraseFromParent();
3592 auto Unmerge =
B.buildUnmerge(Ty, Src);
3593 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3602 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3604 LLT DstLLT =
MRI.getType(DstReg);
3616 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3625 MI.eraseFromParent();
3636 MI.eraseFromParent();
3643 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3644 if (SrcTy.isVector()) {
3648 if (DstTy.isVector()) {
3649 int NumDstElt = DstTy.getNumElements();
3650 int NumSrcElt = SrcTy.getNumElements();
3653 LLT DstCastTy = DstEltTy;
3654 LLT SrcPartTy = SrcEltTy;
3658 if (NumSrcElt < NumDstElt) {
3669 SrcPartTy = SrcEltTy;
3670 }
else if (NumSrcElt > NumDstElt) {
3682 DstCastTy = DstEltTy;
3687 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3691 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3692 MI.eraseFromParent();
3696 if (DstTy.isVector()) {
3699 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3700 MI.eraseFromParent();
3716 unsigned NewEltSize,
3717 unsigned OldEltSize) {
3718 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3719 LLT IdxTy =
B.getMRI()->getType(Idx);
3722 auto OffsetMask =
B.buildConstant(
3724 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3725 return B.buildShl(IdxTy, OffsetIdx,
3726 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3741 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3745 unsigned OldNumElts = SrcVecTy.getNumElements();
3752 if (NewNumElts > OldNumElts) {
3763 if (NewNumElts % OldNumElts != 0)
3767 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3771 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3774 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3776 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3777 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3778 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3779 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3780 NewOps[
I] = Elt.getReg(0);
3783 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3785 MI.eraseFromParent();
3789 if (NewNumElts < OldNumElts) {
3790 if (NewEltSize % OldEltSize != 0)
3812 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3813 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3816 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3820 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3821 ScaledIdx).getReg(0);
3829 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3831 MI.eraseFromParent();
3845 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3846 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3847 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3848 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3851 auto EltMask =
B.buildConstant(
3855 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3856 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3859 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3863 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3877 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3878 MI.getFirst4RegLLTs();
3890 if (NewNumElts < OldNumElts) {
3891 if (NewEltSize % OldEltSize != 0)
3900 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3901 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3904 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3908 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3909 ScaledIdx).getReg(0);
3919 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3920 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3924 MI.eraseFromParent();
3954 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3958 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3959 return UnableToLegalize;
3964 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3966 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3975 MI.eraseFromParent();
3993 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3994 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4004 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4005 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4007 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4008 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4010 MI.eraseFromParent();
4040 LLT DstTy = MRI.getType(Dst);
4041 LLT SrcTy = MRI.getType(Src);
4047 if (DstTy == CastTy)
4055 if (CastEltSize < DstEltSize)
4058 auto AdjustAmt = CastEltSize / DstEltSize;
4059 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4060 SrcTyMinElts % AdjustAmt != 0)
4065 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4066 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4069 ES->eraseFromParent();
4104 LLT DstTy = MRI.getType(Dst);
4105 LLT BigVecTy = MRI.getType(BigVec);
4106 LLT SubVecTy = MRI.getType(SubVec);
4108 if (DstTy == CastTy)
4123 if (CastEltSize < DstEltSize)
4126 auto AdjustAmt = CastEltSize / DstEltSize;
4127 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4128 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4134 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4135 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4137 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4140 ES->eraseFromParent();
4148 LLT DstTy = MRI.getType(DstReg);
4156 if (MemSizeInBits != MemStoreSizeInBits) {
4173 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4177 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4178 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4180 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4183 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4185 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4188 if (DstTy != LoadTy)
4196 if (
MIRBuilder.getDataLayout().isBigEndian())
4214 uint64_t LargeSplitSize, SmallSplitSize;
4219 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4226 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4229 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4240 if (Alignment.
value() * 8 > MemSizeInBits &&
4245 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4262 LLT PtrTy = MRI.getType(PtrReg);
4265 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4269 LargeSplitSize / 8);
4270 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4271 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4273 SmallPtr, *SmallMMO);
4275 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4276 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4278 if (AnyExtTy == DstTy)
4279 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4281 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4285 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4305 LLT SrcTy = MRI.getType(SrcReg);
4313 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4319 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4321 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4325 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4329 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4344 uint64_t LargeSplitSize, SmallSplitSize;
4351 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4354 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4363 if (SrcTy.isPointer()) {
4365 SrcReg =
MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4368 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4371 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4372 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4375 LLT PtrTy = MRI.getType(PtrReg);
4378 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4384 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4385 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4394 LLT SrcTy = MRI.getType(SrcReg);
4400 assert(SrcTy.isVector() &&
"Expect a vector store type");
4407 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4411 auto Elt =
MIRBuilder.buildExtractVectorElement(
4412 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4413 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4414 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4420 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4421 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4425 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4436 switch (
MI.getOpcode()) {
4437 case TargetOpcode::G_LOAD: {
4455 case TargetOpcode::G_STORE: {
4471 case TargetOpcode::G_SELECT: {
4475 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4477 dbgs() <<
"bitcast action not implemented for vector select\n");
4488 case TargetOpcode::G_AND:
4489 case TargetOpcode::G_OR:
4490 case TargetOpcode::G_XOR: {
4498 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4500 case TargetOpcode::G_INSERT_VECTOR_ELT:
4502 case TargetOpcode::G_CONCAT_VECTORS:
4504 case TargetOpcode::G_SHUFFLE_VECTOR:
4506 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4508 case TargetOpcode::G_INSERT_SUBVECTOR:
4516void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4526 switch(
MI.getOpcode()) {
4529 case TargetOpcode::G_FCONSTANT:
4531 case TargetOpcode::G_BITCAST:
4533 case TargetOpcode::G_SREM:
4534 case TargetOpcode::G_UREM: {
4535 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4537 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4538 {MI.getOperand(1), MI.getOperand(2)});
4540 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4542 MI.eraseFromParent();
4545 case TargetOpcode::G_SADDO:
4546 case TargetOpcode::G_SSUBO:
4548 case TargetOpcode::G_SADDE:
4550 case TargetOpcode::G_SSUBE:
4552 case TargetOpcode::G_UMULH:
4553 case TargetOpcode::G_SMULH:
4555 case TargetOpcode::G_SMULO:
4556 case TargetOpcode::G_UMULO: {
4559 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4560 LLT Ty = MRI.getType(Res);
4562 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4563 ? TargetOpcode::G_SMULH
4564 : TargetOpcode::G_UMULH;
4568 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4569 MI.removeOperand(1);
4572 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4580 if (Opcode == TargetOpcode::G_SMULH) {
4581 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4582 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4589 case TargetOpcode::G_FNEG: {
4590 auto [Res, SubByReg] =
MI.getFirst2Regs();
4591 LLT Ty = MRI.getType(Res);
4595 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4596 MI.eraseFromParent();
4599 case TargetOpcode::G_FSUB:
4600 case TargetOpcode::G_STRICT_FSUB: {
4601 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4602 LLT Ty = MRI.getType(Res);
4607 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4608 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4612 MI.eraseFromParent();
4615 case TargetOpcode::G_FMAD:
4617 case TargetOpcode::G_FFLOOR:
4619 case TargetOpcode::G_LROUND:
4620 case TargetOpcode::G_LLROUND: {
4623 LLT SrcTy = MRI.getType(SrcReg);
4624 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4627 MI.eraseFromParent();
4630 case TargetOpcode::G_INTRINSIC_ROUND:
4632 case TargetOpcode::G_FRINT: {
4635 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4638 case TargetOpcode::G_INTRINSIC_LRINT:
4639 case TargetOpcode::G_INTRINSIC_LLRINT: {
4642 LLT SrcTy = MRI.getType(SrcReg);
4644 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4646 MI.eraseFromParent();
4649 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4650 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4651 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4652 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4653 **
MI.memoperands_begin());
4655 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4656 MI.eraseFromParent();
4659 case TargetOpcode::G_LOAD:
4660 case TargetOpcode::G_SEXTLOAD:
4661 case TargetOpcode::G_ZEXTLOAD:
4663 case TargetOpcode::G_STORE:
4665 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4666 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4667 case TargetOpcode::G_CTLZ:
4668 case TargetOpcode::G_CTTZ:
4669 case TargetOpcode::G_CTPOP:
4670 case TargetOpcode::G_CTLS:
4673 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4675 Register NewRes = MRI.cloneVirtualRegister(Res);
4682 MI.eraseFromParent();
4686 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4687 const LLT CondTy = MRI.getType(CarryOut);
4688 const LLT Ty = MRI.getType(Res);
4690 Register NewRes = MRI.cloneVirtualRegister(Res);
4693 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4699 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4700 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4707 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4712 MI.eraseFromParent();
4716 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4721 MI.eraseFromParent();
4725 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4726 const LLT CondTy = MRI.getType(BorrowOut);
4727 const LLT Ty = MRI.getType(Res);
4730 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4736 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4737 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4744 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4745 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4747 MI.eraseFromParent();
4783 case G_MERGE_VALUES:
4785 case G_UNMERGE_VALUES:
4787 case TargetOpcode::G_SEXT_INREG: {
4788 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4789 int64_t SizeInBits =
MI.getOperand(2).getImm();
4791 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4792 LLT DstTy = MRI.getType(DstReg);
4793 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4796 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4797 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4798 MI.eraseFromParent();
4801 case G_EXTRACT_VECTOR_ELT:
4802 case G_INSERT_VECTOR_ELT:
4804 case G_SHUFFLE_VECTOR:
4806 case G_VECTOR_COMPRESS:
4808 case G_DYN_STACKALLOC:
4812 case G_STACKRESTORE:
4822 case G_READ_REGISTER:
4823 case G_WRITE_REGISTER:
4830 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4831 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4837 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4842 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4853 bool IsSigned =
MI.getOpcode() == G_ABDS;
4854 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4855 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4856 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4880 case G_MEMCPY_INLINE:
4881 return lowerMemcpyInline(
MI);
4892 case G_ATOMICRMW_SUB: {
4893 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
4894 const LLT ValTy = MRI.getType(Val);
4898 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4899 MI.eraseFromParent();
4922 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4926 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4932 Align StackTypeAlign =
4939 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4940 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4945 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4957 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4960 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4971 "Converting bits to bytes lost precision");
4977 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4978 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4980 if (IdxTy != MRI.getType(Index))
4981 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4986 LLT PtrTy = MRI.getType(VecPtr);
4987 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
4995 std::initializer_list<unsigned> NonVecOpIndices) {
4996 if (
MI.getNumMemOperands() != 0)
4999 LLT VecTy =
MRI.getType(
MI.getReg(0));
5013 if (!Ty.isVector()) {
5019 if (Ty.getNumElements() != NumElts)
5034 assert(Ty.isVector() &&
"Expected vector type");
5036 int NumParts, NumLeftover;
5037 std::tie(NumParts, NumLeftover) =
5040 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5041 for (
int i = 0; i < NumParts; ++i) {
5046 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5055 for (
unsigned i = 0; i <
N; ++i) {
5057 Ops.push_back(
Op.getReg());
5058 else if (
Op.isImm())
5059 Ops.push_back(
Op.getImm());
5060 else if (
Op.isPredicate())
5082 std::initializer_list<unsigned> NonVecOpIndices) {
5084 "Non-compatible opcode or not specified non-vector operands");
5085 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5087 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5088 unsigned NumDefs =
MI.getNumDefs();
5096 for (
unsigned i = 0; i < NumDefs; ++i) {
5097 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5105 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5106 ++UseIdx, ++UseNo) {
5109 MI.getOperand(UseIdx));
5118 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5122 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5124 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5125 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5128 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5129 Uses.push_back(InputOpsPieces[InputNo][i]);
5132 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5133 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5138 for (
unsigned i = 0; i < NumDefs; ++i)
5139 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5141 for (
unsigned i = 0; i < NumDefs; ++i)
5142 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5145 MI.eraseFromParent();
5152 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5154 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5155 unsigned NumDefs =
MI.getNumDefs();
5159 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5164 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5165 UseIdx += 2, ++UseNo) {
5173 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5175 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5176 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5178 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5181 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5182 Phi.addUse(InputOpsPieces[j][i]);
5183 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5193 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5195 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5198 MI.eraseFromParent();
5206 const int NumDst =
MI.getNumOperands() - 1;
5207 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5208 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5209 LLT SrcTy = MRI.getType(SrcReg);
5211 if (TypeIdx != 1 || NarrowTy == DstTy)
5218 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5221 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5235 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5236 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5237 const int PartsPerUnmerge = NumDst / NumUnmerge;
5239 for (
int I = 0;
I != NumUnmerge; ++
I) {
5240 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5242 for (
int J = 0; J != PartsPerUnmerge; ++J)
5243 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5244 MIB.addUse(Unmerge.getReg(
I));
5247 MI.eraseFromParent();
5254 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5258 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5260 if (NarrowTy == SrcTy)
5268 assert(SrcTy.isVector() &&
"Expected vector types");
5270 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5284 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5285 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5286 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5292 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5293 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5294 ++i,
Offset += NumNarrowTyElts) {
5297 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5300 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5301 MI.eraseFromParent();
5305 assert(TypeIdx == 0 &&
"Bad type index");
5306 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5321 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5322 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5324 for (
unsigned i = 0; i < NumParts; ++i) {
5326 for (
unsigned j = 0; j < NumElts; ++j)
5327 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5329 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5332 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5333 MI.eraseFromParent();
5341 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5343 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5345 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5347 InsertVal =
MI.getOperand(2).getReg();
5349 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5350 LLT VecTy = MRI.getType(SrcVec);
5356 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5360 MI.eraseFromParent();
5369 SplitPieces[IdxVal] = InsertVal;
5370 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5372 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5376 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5379 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5380 TargetOpcode::G_ANYEXT);
5384 LLT IdxTy = MRI.getType(Idx);
5385 int64_t PartIdx = IdxVal / NewNumElts;
5387 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5390 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5393 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5394 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5395 VecParts[PartIdx] = InsertPart.getReg(0);
5399 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5401 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5405 MI.eraseFromParent();
5425 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5437 LLT ValTy = MRI.getType(ValReg);
5446 int NumLeftover = -1;
5452 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5454 NumParts = NarrowRegs.
size();
5455 NumLeftover = NarrowLeftoverRegs.
size();
5462 LLT PtrTy = MRI.getType(AddrReg);
5465 unsigned TotalSize = ValTy.getSizeInBits();
5472 auto MMO = LdStMI.
getMMO();
5474 unsigned NumParts,
unsigned Offset) ->
unsigned {
5477 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5479 unsigned ByteOffset =
Offset / 8;
5482 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5489 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5490 ValRegs.push_back(Dst);
5491 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5493 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5502 unsigned HandledOffset =
5503 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5507 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5510 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5511 LeftoverTy, NarrowLeftoverRegs);
5525 switch (
MI.getOpcode()) {
5526 case G_IMPLICIT_DEF:
5542 case G_FCANONICALIZE:
5559 case G_INTRINSIC_LRINT:
5560 case G_INTRINSIC_LLRINT:
5561 case G_INTRINSIC_ROUND:
5562 case G_INTRINSIC_ROUNDEVEN:
5565 case G_INTRINSIC_TRUNC:
5593 case G_FMINNUM_IEEE:
5594 case G_FMAXNUM_IEEE:
5616 case G_CTLZ_ZERO_UNDEF:
5618 case G_CTTZ_ZERO_UNDEF:
5634 case G_ADDRSPACE_CAST:
5647 case G_STRICT_FLDEXP:
5656 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5661 case G_UNMERGE_VALUES:
5663 case G_BUILD_VECTOR:
5664 assert(TypeIdx == 0 &&
"not a vector type index");
5666 case G_CONCAT_VECTORS:
5670 case G_EXTRACT_VECTOR_ELT:
5671 case G_INSERT_VECTOR_ELT:
5680 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5681 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5683 case G_SHUFFLE_VECTOR:
5689 case G_INTRINSIC_FPTRUNC_ROUND:
5699 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5700 "Not a bitcast operation");
5705 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5707 unsigned NewElemCount =
5710 if (NewElemCount == 1) {
5713 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5720 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5729 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5730 MI.eraseFromParent();
5736 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5740 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5741 MI.getFirst3RegLLTs();
5744 if (DstTy != Src1Ty)
5746 if (DstTy != Src2Ty)
5761 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5777 unsigned InputUsed[2] = {-1U, -1U};
5778 unsigned FirstMaskIdx =
High * NewElts;
5779 bool UseBuildVector =
false;
5780 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5782 int Idx = Mask[FirstMaskIdx + MaskOffset];
5787 if (
Input >= std::size(Inputs)) {
5794 Idx -=
Input * NewElts;
5798 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5799 if (InputUsed[OpNo] ==
Input) {
5802 }
else if (InputUsed[OpNo] == -1U) {
5804 InputUsed[OpNo] =
Input;
5809 if (OpNo >= std::size(InputUsed)) {
5812 UseBuildVector =
true;
5817 Ops.push_back(Idx + OpNo * NewElts);
5820 if (UseBuildVector) {
5825 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5827 int Idx = Mask[FirstMaskIdx + MaskOffset];
5832 if (
Input >= std::size(Inputs)) {
5839 Idx -=
Input * NewElts;
5843 .buildExtractVectorElement(
5844 EltTy, Inputs[
Input],
5850 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5851 }
else if (InputUsed[0] == -1U) {
5853 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5854 }
else if (NewElts == 1) {
5855 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5857 Register Op0 = Inputs[InputUsed[0]];
5861 : Inputs[InputUsed[1]];
5863 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5870 MI.eraseFromParent();
5883 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5889 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5892 const unsigned NumParts =
5894 : SrcTy.getNumElements();
5898 if (DstTy != NarrowTy)
5904 unsigned NumPartsLeft = NumParts;
5905 while (NumPartsLeft > 1) {
5906 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5909 .buildInstr(ScalarOpc, {NarrowTy},
5910 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5913 SplitSrcs = PartialResults;
5914 PartialResults.
clear();
5915 NumPartsLeft = SplitSrcs.
size();
5919 MI.eraseFromParent();
5924 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
5925 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5928 MI.eraseFromParent();
5932 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5934 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5942 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5945 Register Acc = PartialReductions[0];
5946 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5947 if (Part == NumParts - 1) {
5949 {Acc, PartialReductions[Part]});
5952 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5956 MI.eraseFromParent();
5962 unsigned int TypeIdx,
5964 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5965 MI.getFirst3RegLLTs();
5966 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5970 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5971 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5972 "Unexpected vecreduce opcode");
5973 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5974 ? TargetOpcode::G_FADD
5975 : TargetOpcode::G_FMUL;
5978 unsigned NumParts = SrcTy.getNumElements();
5981 for (
unsigned i = 0; i < NumParts; i++)
5982 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5986 MI.eraseFromParent();
5993 unsigned ScalarOpc) {
6001 while (SplitSrcs.
size() > 1) {
6003 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6011 SplitSrcs = std::move(PartialRdxs);
6015 MI.getOperand(1).setReg(SplitSrcs[0]);
6022 const LLT HalfTy,
const LLT AmtTy) {
6024 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6025 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6029 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6030 MI.eraseFromParent();
6036 unsigned VTBits = 2 * NVTBits;
6039 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6040 if (Amt.
ugt(VTBits)) {
6042 }
else if (Amt.
ugt(NVTBits)) {
6045 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6046 }
else if (Amt == NVTBits) {
6054 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6057 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6058 if (Amt.
ugt(VTBits)) {
6060 }
else if (Amt.
ugt(NVTBits)) {
6062 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6064 }
else if (Amt == NVTBits) {
6068 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6070 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6072 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6078 if (Amt.
ugt(VTBits)) {
6080 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6081 }
else if (Amt.
ugt(NVTBits)) {
6083 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6085 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6086 }
else if (Amt == NVTBits) {
6089 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6091 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6093 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6095 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6102 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6103 MI.eraseFromParent();
6119 LLT DstTy = MRI.getType(DstReg);
6124 LLT ShiftAmtTy = MRI.getType(Amt);
6126 if (DstEltSize % 2 != 0)
6142 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6153 const unsigned NewBitSize = DstEltSize / 2;
6165 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6167 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6168 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6171 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6172 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6174 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6179 switch (
MI.getOpcode()) {
6180 case TargetOpcode::G_SHL: {
6182 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6184 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6185 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6186 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6189 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6190 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6192 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6194 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6196 ResultRegs[0] =
Lo.getReg(0);
6197 ResultRegs[1] =
Hi.getReg(0);
6200 case TargetOpcode::G_LSHR:
6201 case TargetOpcode::G_ASHR: {
6203 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6205 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6206 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6207 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6211 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6214 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6215 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6217 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6221 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6223 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6225 ResultRegs[0] =
Lo.getReg(0);
6226 ResultRegs[1] =
Hi.getReg(0);
6233 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6234 MI.eraseFromParent();
6243 LLT TargetTy,
LLT ShiftAmtTy) {
6246 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6248 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6249 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6250 const bool NeedsInterWordShift = ShiftBits != 0;
6253 case TargetOpcode::G_SHL: {
6256 if (PartIdx < ShiftWords)
6259 unsigned SrcIdx = PartIdx - ShiftWords;
6260 if (!NeedsInterWordShift)
6261 return SrcParts[SrcIdx];
6266 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6270 return Hi.getReg(0);
6273 case TargetOpcode::G_LSHR: {
6274 unsigned SrcIdx = PartIdx + ShiftWords;
6275 if (SrcIdx >= NumParts)
6277 if (!NeedsInterWordShift)
6278 return SrcParts[SrcIdx];
6282 if (SrcIdx + 1 < NumParts) {
6283 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6287 return Lo.getReg(0);
6290 case TargetOpcode::G_ASHR: {
6292 unsigned SrcIdx = PartIdx + ShiftWords;
6293 if (SrcIdx >= NumParts)
6295 if (!NeedsInterWordShift)
6296 return SrcParts[SrcIdx];
6301 (SrcIdx == NumParts - 1)
6305 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6327 unsigned MainOpcode =
6328 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6332 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6341 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6342 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6344 auto IsZeroBitShift =
6352 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6353 : TargetOpcode::G_SHL;
6356 auto TargetBitsConst =
6358 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6363 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6368 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6370 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6374 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6387 LLT DstTy = MRI.getType(DstReg);
6391 const unsigned NumParts = DstBits / TargetBits;
6393 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6403 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6404 MI.eraseFromParent();
6409 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6410 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6416 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6420 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6423 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6424 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6428 for (
unsigned I = 0;
I < NumParts; ++
I)
6430 Params, TargetTy, ShiftAmtTy);
6432 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6433 MI.eraseFromParent();
6442 LLT DstTy = MRI.getType(DstReg);
6443 LLT ShiftAmtTy = MRI.getType(AmtReg);
6447 const unsigned NumParts = DstBits / TargetBits;
6449 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6466 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6478 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6479 auto TargetBitsLog2Const =
6480 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6481 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6484 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6486 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6494 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6495 auto TargetBitsMinusOneConst =
6496 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6498 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6499 TargetBitsMinusOneConst)
6502 FillValue = ZeroReg;
6510 for (
unsigned I = 0;
I < NumParts; ++
I) {
6512 Register InBoundsResult = FillValue;
6522 for (
unsigned K = 0; K < NumParts; ++K) {
6523 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6525 WordShift, WordShiftKConst);
6537 switch (
MI.getOpcode()) {
6538 case TargetOpcode::G_SHL:
6539 MainSrcIdx = (int)
I - (
int)K;
6540 CarrySrcIdx = MainSrcIdx - 1;
6542 case TargetOpcode::G_LSHR:
6543 case TargetOpcode::G_ASHR:
6544 MainSrcIdx = (int)
I + (
int)K;
6545 CarrySrcIdx = MainSrcIdx + 1;
6553 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6554 Register MainOp = SrcParts[MainSrcIdx];
6558 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6559 CarryOp = SrcParts[CarrySrcIdx];
6560 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6561 CarrySrcIdx >= (
int)NumParts)
6562 CarryOp = FillValue;
6568 ResultForK = FillValue;
6574 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6581 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6585 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6586 MI.eraseFromParent();
6593 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6596 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6611 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6616 "getNeutralElementForVecReduce called with invalid opcode!");
6617 case TargetOpcode::G_VECREDUCE_ADD:
6618 case TargetOpcode::G_VECREDUCE_OR:
6619 case TargetOpcode::G_VECREDUCE_XOR:
6620 case TargetOpcode::G_VECREDUCE_UMAX:
6622 case TargetOpcode::G_VECREDUCE_MUL:
6624 case TargetOpcode::G_VECREDUCE_AND:
6625 case TargetOpcode::G_VECREDUCE_UMIN:
6628 case TargetOpcode::G_VECREDUCE_SMAX:
6631 case TargetOpcode::G_VECREDUCE_SMIN:
6634 case TargetOpcode::G_VECREDUCE_FADD:
6636 case TargetOpcode::G_VECREDUCE_FMUL:
6638 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6639 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6640 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6641 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6649 unsigned Opc =
MI.getOpcode();
6651 case TargetOpcode::G_IMPLICIT_DEF:
6652 case TargetOpcode::G_LOAD: {
6660 case TargetOpcode::G_STORE:
6667 case TargetOpcode::G_AND:
6668 case TargetOpcode::G_OR:
6669 case TargetOpcode::G_XOR:
6670 case TargetOpcode::G_ADD:
6671 case TargetOpcode::G_SUB:
6672 case TargetOpcode::G_MUL:
6673 case TargetOpcode::G_FADD:
6674 case TargetOpcode::G_FSUB:
6675 case TargetOpcode::G_FMUL:
6676 case TargetOpcode::G_FDIV:
6677 case TargetOpcode::G_FCOPYSIGN:
6678 case TargetOpcode::G_UADDSAT:
6679 case TargetOpcode::G_USUBSAT:
6680 case TargetOpcode::G_SADDSAT:
6681 case TargetOpcode::G_SSUBSAT:
6682 case TargetOpcode::G_SMIN:
6683 case TargetOpcode::G_SMAX:
6684 case TargetOpcode::G_UMIN:
6685 case TargetOpcode::G_UMAX:
6686 case TargetOpcode::G_FMINNUM:
6687 case TargetOpcode::G_FMAXNUM:
6688 case TargetOpcode::G_FMINNUM_IEEE:
6689 case TargetOpcode::G_FMAXNUM_IEEE:
6690 case TargetOpcode::G_FMINIMUM:
6691 case TargetOpcode::G_FMAXIMUM:
6692 case TargetOpcode::G_FMINIMUMNUM:
6693 case TargetOpcode::G_FMAXIMUMNUM:
6694 case TargetOpcode::G_STRICT_FADD:
6695 case TargetOpcode::G_STRICT_FSUB:
6696 case TargetOpcode::G_STRICT_FMUL: {
6704 case TargetOpcode::G_SHL:
6705 case TargetOpcode::G_ASHR:
6706 case TargetOpcode::G_LSHR: {
6712 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6718 case TargetOpcode::G_FMA:
6719 case TargetOpcode::G_STRICT_FMA:
6720 case TargetOpcode::G_FSHR:
6721 case TargetOpcode::G_FSHL: {
6730 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6731 case TargetOpcode::G_EXTRACT:
6738 case TargetOpcode::G_INSERT:
6739 case TargetOpcode::G_INSERT_VECTOR_ELT:
6740 case TargetOpcode::G_FREEZE:
6741 case TargetOpcode::G_FNEG:
6742 case TargetOpcode::G_FABS:
6743 case TargetOpcode::G_FSQRT:
6744 case TargetOpcode::G_FCEIL:
6745 case TargetOpcode::G_FFLOOR:
6746 case TargetOpcode::G_FNEARBYINT:
6747 case TargetOpcode::G_FRINT:
6748 case TargetOpcode::G_INTRINSIC_ROUND:
6749 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6750 case TargetOpcode::G_INTRINSIC_TRUNC:
6751 case TargetOpcode::G_BITREVERSE:
6752 case TargetOpcode::G_BSWAP:
6753 case TargetOpcode::G_FCANONICALIZE:
6754 case TargetOpcode::G_SEXT_INREG:
6755 case TargetOpcode::G_ABS:
6756 case TargetOpcode::G_CTLZ:
6757 case TargetOpcode::G_CTPOP:
6765 case TargetOpcode::G_SELECT: {
6766 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6768 if (!CondTy.isScalar() ||
6774 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6776 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6781 if (CondTy.isVector())
6791 case TargetOpcode::G_UNMERGE_VALUES:
6793 case TargetOpcode::G_PHI:
6795 case TargetOpcode::G_SHUFFLE_VECTOR:
6797 case TargetOpcode::G_BUILD_VECTOR: {
6799 for (
auto Op :
MI.uses()) {
6807 MIRBuilder.buildDeleteTrailingVectorElements(
6808 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6809 MI.eraseFromParent();
6812 case TargetOpcode::G_SEXT:
6813 case TargetOpcode::G_ZEXT:
6814 case TargetOpcode::G_ANYEXT:
6815 case TargetOpcode::G_TRUNC:
6816 case TargetOpcode::G_FPTRUNC:
6817 case TargetOpcode::G_FPEXT:
6818 case TargetOpcode::G_FPTOSI:
6819 case TargetOpcode::G_FPTOUI:
6820 case TargetOpcode::G_FPTOSI_SAT:
6821 case TargetOpcode::G_FPTOUI_SAT:
6822 case TargetOpcode::G_SITOFP:
6823 case TargetOpcode::G_UITOFP: {
6830 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6833 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6841 case TargetOpcode::G_ICMP:
6842 case TargetOpcode::G_FCMP: {
6850 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6855 case TargetOpcode::G_BITCAST: {
6859 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6860 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
6876 case TargetOpcode::G_VECREDUCE_FADD:
6877 case TargetOpcode::G_VECREDUCE_FMUL:
6878 case TargetOpcode::G_VECREDUCE_ADD:
6879 case TargetOpcode::G_VECREDUCE_MUL:
6880 case TargetOpcode::G_VECREDUCE_AND:
6881 case TargetOpcode::G_VECREDUCE_OR:
6882 case TargetOpcode::G_VECREDUCE_XOR:
6883 case TargetOpcode::G_VECREDUCE_SMAX:
6884 case TargetOpcode::G_VECREDUCE_SMIN:
6885 case TargetOpcode::G_VECREDUCE_UMAX:
6886 case TargetOpcode::G_VECREDUCE_UMIN: {
6887 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
6889 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6890 auto NeutralElement = getNeutralElementForVecReduce(
6896 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
6897 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6898 NeutralElement, Idx);
6902 MO.
setReg(NewVec.getReg(0));
6914 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6916 unsigned MaskNumElts = Mask.size();
6917 unsigned SrcNumElts = SrcTy.getNumElements();
6920 if (MaskNumElts == SrcNumElts)
6923 if (MaskNumElts < SrcNumElts) {
6931 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
6932 MI.getOperand(1).getReg(),
6933 MI.getOperand(2).getReg(), NewMask);
6934 MI.eraseFromParent();
6939 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
6940 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6949 MOps1[0] =
MI.getOperand(1).getReg();
6950 MOps2[0] =
MI.getOperand(2).getReg();
6952 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6953 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6957 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
6959 if (Idx >=
static_cast<int>(SrcNumElts))
6960 Idx += PaddedMaskNumElts - SrcNumElts;
6965 if (MaskNumElts != PaddedMaskNumElts) {
6967 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6970 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
6972 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
6977 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6980 MI.eraseFromParent();
6986 unsigned int TypeIdx,
LLT MoreTy) {
6987 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
6989 unsigned NumElts = DstTy.getNumElements();
6992 if (DstTy.isVector() && Src1Ty.isVector() &&
6993 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7001 if (DstTy != Src1Ty || DstTy != Src2Ty)
7009 for (
unsigned I = 0;
I != NumElts; ++
I) {
7011 if (Idx <
static_cast<int>(NumElts))
7014 NewMask[
I] = Idx - NumElts + WidenNumElts;
7018 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7019 MI.getOperand(1).getReg(),
7020 MI.getOperand(2).getReg(), NewMask);
7021 MI.eraseFromParent();
7030 unsigned SrcParts = Src1Regs.
size();
7031 unsigned DstParts = DstRegs.
size();
7033 unsigned DstIdx = 0;
7035 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7036 DstRegs[DstIdx] = FactorSum;
7041 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7043 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7044 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7046 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7050 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7051 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7053 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7063 if (DstIdx != DstParts - 1) {
7064 MachineInstrBuilder Uaddo =
7065 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
7066 FactorSum = Uaddo.
getReg(0);
7067 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7068 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7069 MachineInstrBuilder Uaddo =
7070 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
7071 FactorSum = Uaddo.
getReg(0);
7072 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7073 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7077 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7078 for (
unsigned i = 2; i < Factors.
size(); ++i)
7079 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7082 CarrySumPrevDstIdx = CarrySum;
7083 DstRegs[DstIdx] = FactorSum;
7095 LLT DstType = MRI.getType(DstReg);
7097 if (DstType.isVector())
7100 unsigned Opcode =
MI.getOpcode();
7101 unsigned OpO, OpE, OpF;
7103 case TargetOpcode::G_SADDO:
7104 case TargetOpcode::G_SADDE:
7105 case TargetOpcode::G_UADDO:
7106 case TargetOpcode::G_UADDE:
7107 case TargetOpcode::G_ADD:
7108 OpO = TargetOpcode::G_UADDO;
7109 OpE = TargetOpcode::G_UADDE;
7110 OpF = TargetOpcode::G_UADDE;
7111 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7112 OpF = TargetOpcode::G_SADDE;
7114 case TargetOpcode::G_SSUBO:
7115 case TargetOpcode::G_SSUBE:
7116 case TargetOpcode::G_USUBO:
7117 case TargetOpcode::G_USUBE:
7118 case TargetOpcode::G_SUB:
7119 OpO = TargetOpcode::G_USUBO;
7120 OpE = TargetOpcode::G_USUBE;
7121 OpF = TargetOpcode::G_USUBE;
7122 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7123 OpF = TargetOpcode::G_SSUBE;
7130 unsigned NumDefs =
MI.getNumExplicitDefs();
7131 Register Src1 =
MI.getOperand(NumDefs).getReg();
7132 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7135 CarryDst =
MI.getOperand(1).getReg();
7136 if (
MI.getNumOperands() == NumDefs + 3)
7137 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7139 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7140 LLT LeftoverTy, DummyTy;
7142 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7147 int NarrowParts = Src1Regs.
size();
7148 Src1Regs.
append(Src1Left);
7149 Src2Regs.
append(Src2Left);
7152 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7154 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7157 if (i == e - 1 && CarryDst)
7158 CarryOut = CarryDst;
7160 CarryOut = MRI.createGenericVirtualRegister(
LLT::scalar(1));
7163 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7164 {Src1Regs[i], Src2Regs[i]});
7165 }
else if (i == e - 1) {
7166 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7167 {Src1Regs[i], Src2Regs[i], CarryIn});
7169 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7170 {Src1Regs[i], Src2Regs[i], CarryIn});
7176 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7177 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7178 ArrayRef(DstRegs).drop_front(NarrowParts));
7180 MI.eraseFromParent();
7186 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7188 LLT Ty = MRI.getType(DstReg);
7192 unsigned Size = Ty.getSizeInBits();
7194 if (
Size % NarrowSize != 0)
7197 unsigned NumParts =
Size / NarrowSize;
7198 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7199 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7205 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7209 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7210 MI.eraseFromParent();
7220 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7223 LLT SrcTy = MRI.getType(Src);
7234 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7247 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7250 if (SizeOp1 % NarrowSize != 0)
7252 int NumParts = SizeOp1 / NarrowSize;
7255 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7259 uint64_t OpStart =
MI.getOperand(2).getImm();
7260 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7261 for (
int i = 0; i < NumParts; ++i) {
7262 unsigned SrcStart = i * NarrowSize;
7264 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7267 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7275 int64_t ExtractOffset;
7277 if (OpStart < SrcStart) {
7279 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7281 ExtractOffset = OpStart - SrcStart;
7282 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7286 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7288 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7289 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7296 if (MRI.getType(DstReg).isVector())
7297 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7298 else if (DstRegs.
size() > 1)
7299 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7302 MI.eraseFromParent();
7314 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7316 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7319 SrcRegs.
append(LeftoverRegs);
7323 uint64_t OpStart =
MI.getOperand(3).getImm();
7324 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7325 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7326 unsigned DstStart =
I * NarrowSize;
7328 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7336 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7338 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7342 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7350 int64_t ExtractOffset, InsertOffset;
7352 if (OpStart < DstStart) {
7354 ExtractOffset = DstStart - OpStart;
7355 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7357 InsertOffset = OpStart - DstStart;
7360 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7364 if (ExtractOffset != 0 || SegSize != OpSize) {
7366 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7367 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7370 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7371 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7379 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7382 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7384 MI.eraseFromParent();
7392 LLT DstTy = MRI.getType(DstReg);
7394 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7400 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7401 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7405 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7406 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7409 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7410 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7411 {Src0Regs[I], Src1Regs[I]});
7415 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7418 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7419 DstLeftoverRegs.
push_back(Inst.getReg(0));
7422 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7423 LeftoverTy, DstLeftoverRegs);
7425 MI.eraseFromParent();
7435 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7437 LLT DstTy = MRI.getType(DstReg);
7442 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7443 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7444 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7446 MI.eraseFromParent();
7456 Register CondReg =
MI.getOperand(1).getReg();
7457 LLT CondTy = MRI.getType(CondReg);
7462 LLT DstTy = MRI.getType(DstReg);
7468 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7469 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7473 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7474 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7477 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7479 CondReg, Src1Regs[
I], Src2Regs[
I]);
7483 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7485 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7489 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7490 LeftoverTy, DstLeftoverRegs);
7492 MI.eraseFromParent();
7502 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7505 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7506 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7509 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7511 auto C_0 =
B.buildConstant(NarrowTy, 0);
7513 UnmergeSrc.getReg(1), C_0);
7514 auto LoCTLZ = IsUndef ?
7515 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7516 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7517 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7518 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7519 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7520 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7522 MI.eraseFromParent();
7535 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7538 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7539 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7542 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7544 auto C_0 =
B.buildConstant(NarrowTy, 0);
7546 UnmergeSrc.getReg(0), C_0);
7547 auto HiCTTZ = IsUndef ?
7548 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7549 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7550 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7551 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7552 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7553 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7555 MI.eraseFromParent();
7568 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7571 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7576 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7580 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7581 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7583 auto LoSign =
B.buildAShr(NarrowTy,
Lo, ShAmt);
7585 LoSign.getReg(0), Sign.getReg(0));
7590 auto LoCTLS =
B.buildCTLS(DstTy,
Lo);
7591 auto GNarrowSize =
B.buildConstant(DstTy, NarrowSize);
7592 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLS, GNarrowSize);
7596 auto GNarrowSizeMinus1 =
B.buildConstant(DstTy, NarrowSize - 1);
7598 B.buildSelect(DstTy, LoSameSign, HiIsSignCTLS, GNarrowSizeMinus1);
7600 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7602 B.buildSelect(DstReg, HiIsSign, HiSignResult, HiCTLS);
7604 MI.eraseFromParent();
7614 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7617 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7618 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7620 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7621 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7622 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7624 MI.eraseFromParent();
7639 LLT ExpTy = MRI.getType(ExpReg);
7644 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7645 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7646 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7647 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7649 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7651 MI.getOperand(2).setReg(Trunc.getReg(0));
7658 unsigned Opc =
MI.getOpcode();
7661 auto QAction = LI.getAction(Q).Action;
7667 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7670 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7674 case TargetOpcode::G_CTLZ: {
7675 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7676 unsigned Len = SrcTy.getScalarSizeInBits();
7678 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7680 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7681 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7684 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7685 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7686 MI.eraseFromParent();
7702 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7703 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7706 Op = MIBOp.getReg(0);
7711 MI.eraseFromParent();
7714 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7717 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7721 case TargetOpcode::G_CTTZ: {
7722 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7724 unsigned Len = SrcTy.getScalarSizeInBits();
7725 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7728 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7729 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7732 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7733 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7734 MI.eraseFromParent();
7741 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7742 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7744 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7745 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7746 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7747 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7750 MI.eraseFromParent();
7754 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7755 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7759 case TargetOpcode::G_CTPOP: {
7761 LLT Ty = MRI.getType(SrcReg);
7762 unsigned Size = Ty.getScalarSizeInBits();
7774 auto C_1 =
B.buildConstant(Ty, 1);
7775 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7777 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7778 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7779 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7783 auto C_2 =
B.buildConstant(Ty, 2);
7784 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7786 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7787 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7788 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7789 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7796 auto C_4 =
B.buildConstant(Ty, 4);
7797 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7798 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7800 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7801 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7803 assert(
Size<=128 &&
"Scalar size is too large for CTPOP lower algorithm");
7809 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7811 auto IsMulSupported = [
this](
const LLT Ty) {
7812 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7815 if (IsMulSupported(Ty)) {
7816 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7817 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7819 auto ResTmp = B8Count;
7820 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7821 auto ShiftC =
B.buildConstant(Ty, Shift);
7822 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7823 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7825 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7827 MI.eraseFromParent();
7830 case TargetOpcode::G_CTLS: {
7831 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7835 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7836 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7838 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7844 MI.eraseFromParent();
7865 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7866 LLT Ty = MRI.getType(Dst);
7867 LLT ShTy = MRI.getType(Z);
7874 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7875 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7880 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
7881 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7885 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7898 MI.eraseFromParent();
7904 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7905 LLT Ty = MRI.getType(Dst);
7906 LLT ShTy = MRI.getType(Z);
7909 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7919 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
7920 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7921 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7922 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7923 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7927 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
7930 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7933 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7935 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
7936 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7937 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7940 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7942 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
7944 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7947 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7948 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
7953 MI.eraseFromParent();
7964 LLT Ty = MRI.getType(Dst);
7965 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
7967 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7968 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7971 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
7972 return lowerFunnelShiftAsShifts(
MI);
7976 if (Result == UnableToLegalize)
7977 return lowerFunnelShiftAsShifts(
MI);
7982 auto [Dst, Src] =
MI.getFirst2Regs();
7983 LLT DstTy = MRI.getType(Dst);
7984 LLT SrcTy = MRI.getType(Src);
7988 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7996 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8000 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8004 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8009 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8010 {UnmergeSrc.getReg(0)});
8011 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8012 {UnmergeSrc.getReg(1)});
8015 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8017 MI.eraseFromParent();
8034 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8038 LLT DstTy = MRI.getType(DstReg);
8039 LLT SrcTy = MRI.getType(SrcReg);
8047 SrcTy.getElementCount().divideCoefficientBy(2));
8060 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8072 MI.eraseFromParent();
8081 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8082 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8083 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8084 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8085 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8086 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8087 MI.eraseFromParent();
8092 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8094 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8095 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8100 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8101 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8103 return lowerRotateWithReverseRotate(
MI);
8106 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8107 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8108 bool IsFShLegal =
false;
8109 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8110 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8114 MI.eraseFromParent();
8119 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8122 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8127 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8128 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8129 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8135 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8136 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8138 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8144 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8145 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8147 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8149 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8154 MI.eraseFromParent();
8162 auto [Dst, Src] =
MI.getFirst2Regs();
8167 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8195 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8208 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8212 MI.eraseFromParent();
8220 auto [Dst, Src] =
MI.getFirst2Regs();
8225 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8238 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8240 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8245 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8247 MI.eraseFromParent();
8255 auto [Dst, Src] =
MI.getFirst2Regs();
8259 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8270 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8271 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8273 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8280 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8281 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8282 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8284 MI.eraseFromParent();
8294 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8300 MI.eraseFromParent();
8305 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8308 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8309 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8310 MIRBuilder.buildSelect(Dst, Src, True, False);
8311 MI.eraseFromParent();
8315 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8335 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8342 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8343 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8344 MIRBuilder.buildSelect(Dst, Src, True, False);
8345 MI.eraseFromParent();
8349 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8372 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8373 MI.eraseFromParent();
8381 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8385 if (SrcTy !=
S64 && SrcTy !=
S32)
8387 if (DstTy !=
S32 && DstTy !=
S64)
8414 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8416 MI.eraseFromParent();
8421 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8426 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8433 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8435 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8436 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8438 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8439 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8441 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8443 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8444 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8445 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8448 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8449 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8450 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8452 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8455 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8460 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8461 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8467 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8469 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8470 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8472 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8477 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8478 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8480 MI.eraseFromParent();
8486 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8488 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8489 unsigned SatWidth = DstTy.getScalarSizeInBits();
8493 APInt MinInt, MaxInt;
8516 if (AreExactFloatBounds) {
8518 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8520 SrcTy.changeElementSize(1), Src, MaxC);
8521 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8523 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8533 MI.eraseFromParent();
8538 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8540 DstTy.changeElementSize(1), Src, Src);
8543 MI.eraseFromParent();
8550 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8559 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8570 MI.eraseFromParent();
8576 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8578 DstTy.changeElementSize(1), Src, Src);
8580 MI.eraseFromParent();
8590 auto [Dst, Src] =
MI.getFirst2Regs();
8592 MRI.getType(Src).getScalarType() ==
LLT::scalar(64));
8594 if (MRI.getType(Src).isVector())
8598 unsigned Flags =
MI.getFlags();
8601 MI.eraseFromParent();
8605 const unsigned ExpMask = 0x7ff;
8606 const unsigned ExpBiasf64 = 1023;
8607 const unsigned ExpBiasf16 = 15;
8636 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8696 MI.eraseFromParent();
8702 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8706 if (DstTy.getScalarType() ==
S16 && SrcTy.getScalarType() ==
S64)
8713 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8714 LLT Ty = MRI.getType(Dst);
8716 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8717 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8718 MI.eraseFromParent();
8724 case TargetOpcode::G_SMIN:
8726 case TargetOpcode::G_SMAX:
8728 case TargetOpcode::G_UMIN:
8730 case TargetOpcode::G_UMAX:
8738 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8743 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8744 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8746 MI.eraseFromParent();
8755 LLT DstTy = MRI.getType(Dst);
8756 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8766 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
8767 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8769 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8772 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
8773 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
8774 if (TLI.preferSelectsOverBooleanArithmetic(
8777 auto One =
MIRBuilder.buildConstant(DstTy, 1);
8778 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8780 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
8781 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8787 unsigned BoolExtOp =
8789 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8790 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8794 MI.eraseFromParent();
8800 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8801 const int Src0Size = Src0Ty.getScalarSizeInBits();
8802 const int Src1Size = Src1Ty.getScalarSizeInBits();
8807 auto NotSignBitMask =
MIRBuilder.buildConstant(
8812 if (Src0Ty == Src1Ty) {
8813 And1 =
MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8814 }
else if (Src0Size > Src1Size) {
8815 auto ShiftAmt =
MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8816 auto Zext =
MIRBuilder.buildZExt(Src0Ty, Src1);
8817 auto Shift =
MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8818 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8820 auto ShiftAmt =
MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8821 auto Shift =
MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8822 auto Trunc =
MIRBuilder.buildTrunc(Src0Ty, Shift);
8823 And1 =
MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8829 unsigned Flags =
MI.getFlags();
8836 MI.eraseFromParent();
8847 switch (
MI.getOpcode()) {
8848 case TargetOpcode::G_FMINNUM:
8849 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8851 case TargetOpcode::G_FMINIMUMNUM:
8852 NewOp = TargetOpcode::G_FMINNUM;
8854 case TargetOpcode::G_FMAXNUM:
8855 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8857 case TargetOpcode::G_FMAXIMUMNUM:
8858 NewOp = TargetOpcode::G_FMAXNUM;
8864 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8865 LLT Ty = MRI.getType(Dst);
8875 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
8878 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
8883 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
8884 MI.eraseFromParent();
8890 unsigned Opc =
MI.getOpcode();
8891 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8892 LLT Ty = MRI.getType(Dst);
8895 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
8897 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8898 unsigned OpcNonIeee =
8899 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8900 bool MinMaxMustRespectOrderedZero =
false;
8904 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8906 MinMaxMustRespectOrderedZero =
true;
8907 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8912 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8920 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
8924 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8926 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8936 const unsigned Flags =
MI.getFlags();
8942 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8944 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8946 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8948 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8950 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8955 MI.eraseFromParent();
8962 LLT Ty = MRI.getType(DstReg);
8963 unsigned Flags =
MI.getFlags();
8968 MI.eraseFromParent();
8974 auto [DstReg,
X] =
MI.getFirst2Regs();
8975 const unsigned Flags =
MI.getFlags();
8976 const LLT Ty = MRI.getType(DstReg);
8988 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
8990 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
8995 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
8996 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8997 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8998 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9000 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9002 MI.eraseFromParent();
9007 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9008 unsigned Flags =
MI.getFlags();
9009 LLT Ty = MRI.getType(DstReg);
9016 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9017 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9020 SrcReg, Zero, Flags);
9022 SrcReg, Trunc, Flags);
9026 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9027 MI.eraseFromParent();
9033 const unsigned NumOps =
MI.getNumOperands();
9034 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9035 unsigned PartSize = Src0Ty.getSizeInBits();
9040 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9041 const unsigned Offset = (
I - 1) * PartSize;
9044 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9047 MRI.createGenericVirtualRegister(WideTy);
9050 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9051 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9052 ResultReg = NextResult;
9055 if (DstTy.isPointer()) {
9056 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9057 DstTy.getAddressSpace())) {
9065 MI.eraseFromParent();
9071 const unsigned NumDst =
MI.getNumOperands() - 1;
9072 Register SrcReg =
MI.getOperand(NumDst).getReg();
9073 Register Dst0Reg =
MI.getOperand(0).getReg();
9074 LLT DstTy = MRI.getType(Dst0Reg);
9083 LLT IntTy = MRI.getType(SrcReg);
9088 unsigned Offset = DstSize;
9089 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9091 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9095 MI.eraseFromParent();
9114 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9115 InsertVal =
MI.getOperand(2).getReg();
9117 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9119 LLT VecTy = MRI.getType(SrcVec);
9129 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9130 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9132 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9135 MI.eraseFromParent();
9140 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9151 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9158 int64_t
Offset = IdxVal * EltBytes;
9169 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9172 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9174 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9177 MI.eraseFromParent();
9183 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9184 MI.getFirst3RegLLTs();
9194 for (
int Idx : Mask) {
9196 if (!Undef.isValid())
9197 Undef =
MIRBuilder.buildUndef(EltTy).getReg(0);
9202 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9204 int NumElts = Src0Ty.getNumElements();
9205 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9206 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9207 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9209 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9211 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9216 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9217 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9218 MI.eraseFromParent();
9224 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9225 MI.getFirst4RegLLTs();
9227 if (VecTy.isScalableVector())
9243 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9246 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9249 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9252 std::optional<APInt> PassthruSplatVal =
9255 if (PassthruSplatVal.has_value()) {
9257 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9258 }
else if (HasPassthru) {
9259 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9260 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9266 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9270 unsigned NumElmts = VecTy.getNumElements();
9271 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9273 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9276 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9279 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9284 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9286 if (HasPassthru &&
I == NumElmts - 1) {
9289 auto AllLanesSelected =
MIRBuilder.buildICmp(
9291 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9292 {OutPos, EndOfVector});
9296 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9298 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9303 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9305 MI.eraseFromParent();
9316 SPTmp =
MIRBuilder.buildCast(IntPtrTy, SPTmp);
9322 if (Alignment >
Align(1)) {
9325 auto AlignCst =
MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9334 const auto &MF = *
MI.getMF();
9340 Register AllocSize =
MI.getOperand(1).getReg();
9343 LLT PtrTy = MRI.getType(Dst);
9344 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9351 MI.eraseFromParent();
9357 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9362 MI.eraseFromParent();
9368 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9373 MI.eraseFromParent();
9379 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9380 unsigned Offset =
MI.getOperand(2).getImm();
9383 if (SrcTy.isVector()) {
9384 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9385 unsigned DstSize = DstTy.getSizeInBits();
9387 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9388 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9390 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9394 for (
unsigned Idx =
Offset / SrcEltSize;
9395 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9396 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9398 if (SubVectorElts.
size() == 1)
9399 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9401 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9403 MI.eraseFromParent();
9408 if (DstTy.isScalar() &&
9409 (SrcTy.isScalar() ||
9410 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9411 LLT SrcIntTy = SrcTy;
9412 if (!SrcTy.isScalar()) {
9414 SrcReg =
MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9421 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9425 MI.eraseFromParent();
9433 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9436 LLT DstTy = MRI.getType(Src);
9437 LLT InsertTy = MRI.getType(InsertSrc);
9445 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9447 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9451 for (; Idx <
Offset / EltSize; ++Idx) {
9452 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9457 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9458 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9460 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9469 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9472 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9473 MI.eraseFromParent();
9487 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9491 LLT IntDstTy = DstTy;
9495 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9500 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9506 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9512 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9513 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9514 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9517 MI.eraseFromParent();
9523 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9524 MI.getFirst4RegLLTs();
9525 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9528 LLT BoolTy = Dst1Ty;
9530 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9547 auto ResultLowerThanLHS =
9552 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9555 MI.eraseFromParent();
9561 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9562 const LLT Ty = MRI.getType(Res);
9565 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9566 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9567 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9578 MI.eraseFromParent();
9583 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9584 const LLT Ty = MRI.getType(Res);
9587 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9588 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9589 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9594 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9599 MI.eraseFromParent();
9605 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9606 LLT Ty = MRI.getType(Res);
9610 switch (
MI.getOpcode()) {
9613 case TargetOpcode::G_UADDSAT:
9616 BaseOp = TargetOpcode::G_ADD;
9618 case TargetOpcode::G_SADDSAT:
9621 BaseOp = TargetOpcode::G_ADD;
9623 case TargetOpcode::G_USUBSAT:
9626 BaseOp = TargetOpcode::G_SUB;
9628 case TargetOpcode::G_SSUBSAT:
9631 BaseOp = TargetOpcode::G_SUB;
9646 uint64_t NumBits = Ty.getScalarSizeInBits();
9657 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9665 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9670 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9671 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9674 MI.eraseFromParent();
9680 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9681 LLT Ty = MRI.getType(Res);
9685 unsigned OverflowOp;
9686 switch (
MI.getOpcode()) {
9689 case TargetOpcode::G_UADDSAT:
9692 OverflowOp = TargetOpcode::G_UADDO;
9694 case TargetOpcode::G_SADDSAT:
9697 OverflowOp = TargetOpcode::G_SADDO;
9699 case TargetOpcode::G_USUBSAT:
9702 OverflowOp = TargetOpcode::G_USUBO;
9704 case TargetOpcode::G_SSUBSAT:
9707 OverflowOp = TargetOpcode::G_SSUBO;
9712 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9713 Register Tmp = OverflowRes.getReg(0);
9714 Register Ov = OverflowRes.getReg(1);
9723 uint64_t NumBits = Ty.getScalarSizeInBits();
9724 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
9725 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9728 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
9736 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9740 MI.eraseFromParent();
9746 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9747 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9748 "Expected shlsat opcode!");
9749 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9750 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9751 LLT Ty = MRI.getType(Res);
9755 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
9756 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
9765 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9770 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9772 MI.eraseFromParent();
9777 auto [Dst, Src] =
MI.getFirst2Regs();
9778 const LLT Ty = MRI.getType(Src);
9779 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9780 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9783 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9784 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9785 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9786 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9789 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
9791 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9792 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
9793 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9795 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
9796 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9797 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9799 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9800 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9801 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9803 Res.getInstr()->getOperand(0).setReg(Dst);
9805 MI.eraseFromParent();
9812 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
9815 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9816 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9817 return B.buildOr(Dst,
LHS,
RHS);
9822 auto [Dst, Src] =
MI.getFirst2Regs();
9823 const LLT SrcTy = MRI.getType(Src);
9824 unsigned Size = SrcTy.getScalarSizeInBits();
9825 unsigned VSize = SrcTy.getSizeInBits();
9828 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9829 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9830 {LLT::fixed_vector(VSize / 8, 8),
9831 LLT::fixed_vector(VSize / 8, 8)}}))) {
9836 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
9837 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
9838 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
9842 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9865 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
9869 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
9872 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
9876 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
9880 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
9885 MI.eraseFromParent();
9893 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9894 int NameOpIdx = IsRead ? 1 : 0;
9895 int ValRegIndex = IsRead ? 0 : 1;
9897 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
9898 const LLT Ty = MRI.getType(ValReg);
9900 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9907 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
9908 Fn,
MI.getDebugLoc()));
9912 MI.eraseFromParent();
9921 MI.eraseFromParent();
9927 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
9928 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9930 LLT OrigTy = MRI.getType(Result);
9934 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
9935 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
9937 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9939 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
9940 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
9943 MI.eraseFromParent();
9949 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9954 MI.eraseFromParent();
9959 MI.eraseFromParent();
9966 unsigned BitSize = SrcTy.getScalarSizeInBits();
9970 auto AsInt =
MIRBuilder.buildCopy(IntTy, SrcReg);
9976 APInt ExpMask = Inf;
9982 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
9983 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
9984 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
9985 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
9986 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
9988 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9992 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
9994 LLT DstTyCopy = DstTy;
9996 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10024 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10027 Mask &= ~PartialCheck;
10036 else if (PartialCheck ==
fcZero)
10048 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10049 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10050 auto SubnormalRes =
10052 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10054 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10055 appendToRes(SubnormalRes);
10062 else if (PartialCheck ==
fcInf)
10067 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10074 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10075 if (PartialCheck ==
fcNan) {
10079 }
else if (PartialCheck ==
fcQNan) {
10089 Abs, InfWithQnanBitC);
10090 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10097 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10099 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10100 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10103 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10105 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10108 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10109 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10111 appendToRes(NormalRes);
10115 MI.eraseFromParent();
10121 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10122 MI.getFirst4RegLLTs();
10124 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10128 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10129 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10133 if (MaskTy.isScalar()) {
10141 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10145 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10147 if (DstTy.isVector()) {
10149 auto ShufSplat =
MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10150 MaskReg = ShufSplat.getReg(0);
10155 }
else if (!DstTy.isVector()) {
10160 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10164 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10165 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10166 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10173 MI.eraseFromParent();
10179 unsigned Opcode =
MI.getOpcode();
10182 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10183 : TargetOpcode::G_UDIV,
10184 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10186 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10187 : TargetOpcode::G_UREM,
10188 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10189 MI.eraseFromParent();
10199 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10203 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10206 MI.eraseFromParent();
10216 Register SrcReg =
MI.getOperand(1).getReg();
10217 LLT Ty = MRI.getType(SrcReg);
10218 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10221 MI.eraseFromParent();
10227 Register SrcReg =
MI.getOperand(1).getReg();
10228 Register DestReg =
MI.getOperand(0).getReg();
10230 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10231 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10234 MI.eraseFromParent();
10240 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10241 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10242 "Expected G_ABDS or G_ABDU instruction");
10244 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10245 LLT Ty = MRI.getType(LHS);
10255 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10257 MI.eraseFromParent();
10263 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10264 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10265 "Expected G_ABDS or G_ABDU instruction");
10267 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10268 LLT Ty = MRI.getType(LHS);
10273 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10274 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10275 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10277 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10278 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10280 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10282 MI.eraseFromParent();
10287 Register SrcReg =
MI.getOperand(1).getReg();
10288 Register DstReg =
MI.getOperand(0).getReg();
10290 LLT Ty = MRI.getType(DstReg);
10298 MI.eraseFromParent();
10304 Register SrcReg =
MI.getOperand(1).getReg();
10305 LLT SrcTy = MRI.getType(SrcReg);
10306 LLT DstTy = MRI.getType(SrcReg);
10309 if (SrcTy.isScalar()) {
10314 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10325 Register ListPtr =
MI.getOperand(1).getReg();
10326 LLT PtrTy = MRI.getType(ListPtr);
10333 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10335 const Align A(
MI.getOperand(2).getImm());
10337 if (
A > TLI.getMinStackArgumentAlignment()) {
10339 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10340 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10341 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10342 VAList = AndDst.getReg(0);
10349 LLT LLTTy = MRI.getType(Dst);
10352 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10353 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10358 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10360 Align EltAlignment =
DL.getABITypeAlign(Ty);
10363 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10365 MI.eraseFromParent();
10380 unsigned Limit,
const MemOp &
Op,
10381 unsigned DstAS,
unsigned SrcAS,
10382 const AttributeList &FuncAttributes,
10384 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
10394 if (
Op.isFixedDstAlign())
10395 while (
Op.getDstAlign() < Ty.getSizeInBytes() &&
10398 assert(Ty.getSizeInBits() > 0 &&
"Could not find valid type");
10402 unsigned NumMemOps = 0;
10405 unsigned TySize = Ty.getSizeInBytes();
10406 while (TySize >
Size) {
10415 assert(NewTySize > 0 &&
"Could not find appropriate type");
10422 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
10424 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
10430 TySize = NewTySize;
10434 if (++NumMemOps > Limit)
10437 MemOps.push_back(Ty);
10447 unsigned NumBits = Ty.getScalarSizeInBits();
10449 if (!Ty.isVector() && ValVRegAndVal) {
10450 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10458 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10479 uint64_t KnownLen,
Align Alignment,
10481 auto &MF = *
MI.getParent()->getParent();
10486 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10488 bool DstAlignCanChange =
false;
10492 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10494 DstAlignCanChange =
true;
10496 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10497 std::vector<LLT> MemOps;
10499 const auto &DstMMO = **
MI.memoperands_begin();
10500 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10503 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10514 if (DstAlignCanChange) {
10517 Align NewAlign =
DL.getABITypeAlign(IRTy);
10518 if (NewAlign > Alignment) {
10519 Alignment = NewAlign;
10527 MachineIRBuilder MIB(
MI);
10529 LLT LargestTy = MemOps[0];
10530 for (
unsigned i = 1; i < MemOps.size(); i++)
10532 LargestTy = MemOps[i];
10544 LLT PtrTy = MRI.getType(Dst);
10545 unsigned DstOff = 0;
10546 unsigned Size = KnownLen;
10547 for (
unsigned I = 0;
I < MemOps.size();
I++) {
10548 LLT Ty = MemOps[
I];
10550 if (TySize >
Size) {
10553 assert(
I == MemOps.size() - 1 &&
I != 0);
10554 DstOff -= TySize -
Size;
10564 TLI.isTruncateFree(LargestVT, VT))
10565 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10578 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10581 MIB.buildStore(
Value, Ptr, *StoreMMO);
10586 MI.eraseFromParent();
10592 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10594 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10596 const auto *MMOIt =
MI.memoperands_begin();
10598 bool IsVolatile =
MemOp->isVolatile();
10604 "inline memcpy with dynamic size is not yet supported");
10605 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10606 if (KnownLen == 0) {
10607 MI.eraseFromParent();
10611 const auto &DstMMO = **
MI.memoperands_begin();
10612 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10613 Align DstAlign = DstMMO.getBaseAlign();
10614 Align SrcAlign = SrcMMO.getBaseAlign();
10616 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10623 Align SrcAlign,
bool IsVolatile) {
10624 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10625 return lowerMemcpy(
MI, Dst, Src, KnownLen,
10626 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10633 Align SrcAlign,
bool IsVolatile) {
10634 auto &MF = *
MI.getParent()->getParent();
10639 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10641 bool DstAlignCanChange =
false;
10643 Align Alignment = std::min(DstAlign, SrcAlign);
10647 DstAlignCanChange =
true;
10653 std::vector<LLT> MemOps;
10655 const auto &DstMMO = **
MI.memoperands_begin();
10656 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10662 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10668 if (DstAlignCanChange) {
10671 Align NewAlign =
DL.getABITypeAlign(IRTy);
10676 if (!
TRI->hasStackRealignment(MF))
10678 NewAlign = std::min(NewAlign, *StackAlign);
10680 if (NewAlign > Alignment) {
10681 Alignment = NewAlign;
10689 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
10691 MachineIRBuilder MIB(
MI);
10697 unsigned CurrOffset = 0;
10698 unsigned Size = KnownLen;
10699 for (
auto CopyTy : MemOps) {
10702 if (CopyTy.getSizeInBytes() >
Size)
10703 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
10714 if (CurrOffset != 0) {
10715 LLT SrcTy = MRI.getType(Src);
10718 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10720 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10724 if (CurrOffset != 0) {
10725 LLT DstTy = MRI.getType(Dst);
10726 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
10728 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10729 CurrOffset += CopyTy.getSizeInBytes();
10730 Size -= CopyTy.getSizeInBytes();
10733 MI.eraseFromParent();
10739 uint64_t KnownLen,
Align DstAlign,
Align SrcAlign,
10741 auto &MF = *
MI.getParent()->getParent();
10746 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
10748 bool DstAlignCanChange =
false;
10751 Align Alignment = std::min(DstAlign, SrcAlign);
10753 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10755 DstAlignCanChange =
true;
10757 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10758 std::vector<LLT> MemOps;
10760 const auto &DstMMO = **
MI.memoperands_begin();
10761 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10762 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10763 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10770 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10776 if (DstAlignCanChange) {
10779 Align NewAlign =
DL.getABITypeAlign(IRTy);
10784 if (!
TRI->hasStackRealignment(MF))
10785 if (MaybeAlign StackAlign =
DL.getStackAlignment())
10786 NewAlign = std::min(NewAlign, *StackAlign);
10788 if (NewAlign > Alignment) {
10789 Alignment = NewAlign;
10797 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
10799 MachineIRBuilder MIB(
MI);
10803 unsigned CurrOffset = 0;
10804 SmallVector<Register, 16> LoadVals;
10805 for (
auto CopyTy : MemOps) {
10812 if (CurrOffset != 0) {
10813 LLT SrcTy = MRI.getType(Src);
10816 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10818 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10819 CurrOffset += CopyTy.getSizeInBytes();
10823 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
10824 LLT CopyTy = MemOps[
I];
10830 if (CurrOffset != 0) {
10831 LLT DstTy = MRI.getType(Dst);
10834 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
10836 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
10839 MI.eraseFromParent();
10845 const unsigned Opc =
MI.getOpcode();
10848 assert((
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMMOVE ||
10849 Opc == TargetOpcode::G_MEMSET) &&
10850 "Expected memcpy like instruction");
10852 auto MMOIt =
MI.memoperands_begin();
10857 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10859 if (
Opc != TargetOpcode::G_MEMSET) {
10860 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
10861 MemOp = *(++MMOIt);
10862 SrcAlign =
MemOp->getBaseAlign();
10867 if (!LenVRegAndVal)
10869 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10871 if (KnownLen == 0) {
10872 MI.eraseFromParent();
10876 if (MaxLen && KnownLen > MaxLen)
10879 bool IsVolatile =
MemOp->isVolatile();
10880 if (
Opc == TargetOpcode::G_MEMCPY) {
10881 auto &MF = *
MI.getParent()->getParent();
10884 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10885 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10888 if (
Opc == TargetOpcode::G_MEMMOVE)
10889 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10890 if (
Opc == TargetOpcode::G_MEMSET)
10891 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
constexpr LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.