43#define DEBUG_TYPE "legalizer"
56static std::pair<int, int>
62 unsigned NumParts =
Size / NarrowSize;
63 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
66 if (LeftoverSize == 0)
71 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
88 switch (Ty.getSizeInBits()) {
129 auto Step = LI.getAction(
MI, MRI);
130 switch (Step.Action) {
145 return bitcast(
MI, Step.TypeIdx, Step.NewType);
148 return lower(
MI, Step.TypeIdx, Step.NewType);
157 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
165void LegalizerHelper::insertParts(
Register DstReg,
187 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
189 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
203 LLT Ty = MRI.getType(
Reg);
211void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
214 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
218 if (!MRI.getType(Leftover).isVector())
221 appendVectorElts(AllElts, Leftover);
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
229 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
231 const int StartIdx = Regs.
size();
232 const int NumResults =
MI.getNumOperands() - 1;
234 for (
int I = 0;
I != NumResults; ++
I)
235 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
247 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
254 LLT SrcTy = MRI.getType(SrcReg);
256 extractGCDType(Parts, GCDTy, SrcReg);
260LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
262 unsigned PadStrategy) {
267 int NumOrigSrc = VRegs.
size();
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
279 assert(PadStrategy == TargetOpcode::G_SEXT);
284 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
300 for (
int I = 0;
I != NumParts; ++
I) {
301 bool AllMergePartsArePadding =
true;
304 for (
int J = 0; J != NumSubParts; ++J) {
305 int Idx =
I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
311 SubMerge[J] = VRegs[Idx];
314 AllMergePartsArePadding =
false;
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
333 Remerge[
I] = AllPadReg;
337 if (NumSubParts == 1)
338 Remerge[
I] = SubMerge[0];
340 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[
I];
347 VRegs = std::move(Remerge);
351void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
353 LLT DstTy = MRI.getType(DstReg);
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
363 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
372 UnmergeDefs[0] = DstReg;
373 for (
unsigned I = 1;
I != NumDefs; ++
I)
374 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
385#define RTLIBCASE_INT(LibcallPrefix) \
389 return RTLIB::LibcallPrefix##32; \
391 return RTLIB::LibcallPrefix##64; \
393 return RTLIB::LibcallPrefix##128; \
395 llvm_unreachable("unexpected size"); \
399#define RTLIBCASE(LibcallPrefix) \
403 return RTLIB::LibcallPrefix##32; \
405 return RTLIB::LibcallPrefix##64; \
407 return RTLIB::LibcallPrefix##80; \
409 return RTLIB::LibcallPrefix##128; \
411 llvm_unreachable("unexpected size"); \
416 case TargetOpcode::G_LROUND:
418 case TargetOpcode::G_LLROUND:
420 case TargetOpcode::G_MUL:
422 case TargetOpcode::G_SDIV:
424 case TargetOpcode::G_UDIV:
426 case TargetOpcode::G_SREM:
428 case TargetOpcode::G_UREM:
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
432 case TargetOpcode::G_FADD:
434 case TargetOpcode::G_FSUB:
436 case TargetOpcode::G_FMUL:
438 case TargetOpcode::G_FDIV:
440 case TargetOpcode::G_FEXP:
442 case TargetOpcode::G_FEXP2:
444 case TargetOpcode::G_FEXP10:
446 case TargetOpcode::G_FREM:
448 case TargetOpcode::G_FPOW:
450 case TargetOpcode::G_FPOWI:
452 case TargetOpcode::G_FMA:
454 case TargetOpcode::G_FSIN:
456 case TargetOpcode::G_FCOS:
458 case TargetOpcode::G_FTAN:
460 case TargetOpcode::G_FASIN:
462 case TargetOpcode::G_FACOS:
464 case TargetOpcode::G_FATAN:
466 case TargetOpcode::G_FATAN2:
468 case TargetOpcode::G_FSINH:
470 case TargetOpcode::G_FCOSH:
472 case TargetOpcode::G_FTANH:
474 case TargetOpcode::G_FSINCOS:
476 case TargetOpcode::G_FMODF:
478 case TargetOpcode::G_FLOG10:
480 case TargetOpcode::G_FLOG:
482 case TargetOpcode::G_FLOG2:
484 case TargetOpcode::G_FLDEXP:
486 case TargetOpcode::G_FCEIL:
488 case TargetOpcode::G_FFLOOR:
490 case TargetOpcode::G_FMINNUM:
492 case TargetOpcode::G_FMAXNUM:
494 case TargetOpcode::G_FMINIMUMNUM:
496 case TargetOpcode::G_FMAXIMUMNUM:
498 case TargetOpcode::G_FSQRT:
500 case TargetOpcode::G_FRINT:
502 case TargetOpcode::G_FNEARBYINT:
504 case TargetOpcode::G_INTRINSIC_TRUNC:
506 case TargetOpcode::G_INTRINSIC_ROUND:
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
510 case TargetOpcode::G_INTRINSIC_LRINT:
512 case TargetOpcode::G_INTRINSIC_LLRINT:
532 AttributeList CallerAttrs =
F.getAttributes();
533 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
552 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
559 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
567 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
570 if (Ret->getNumImplicitOperands() != 1)
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
590 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
595 Info.OrigRet = Result;
598 (Result.Ty->isVoidTy() ||
599 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
607 if (
MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
618 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
622 Next->eraseFromParent();
623 }
while (
MI->getNextNode());
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
657 Args.push_back({MO.getReg(), OpType, 0});
676 unsigned AddrSpace =
DL.getAllocaAddrSpace();
694 if (LibcallResult != LegalizeResult::Legalized)
702 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
719 LLT DstTy = MRI.getType(DstFrac);
724 unsigned AddrSpace =
DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
737 if (LibcallResult != LegalizeResult::Legalized)
743 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
755 case TargetOpcode::G_FPEXT:
757 case TargetOpcode::G_FPTRUNC:
759 case TargetOpcode::G_FPTOSI:
761 case TargetOpcode::G_FPTOUI:
763 case TargetOpcode::G_SITOFP:
765 case TargetOpcode::G_UITOFP:
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.
Flags[0].setSExt();
779 Arg.
Flags[0].setZExt();
790 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
794 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy =
nullptr;
804 Args.push_back({Reg, OpTy, 0});
807 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc =
MI.getOpcode();
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
836 if (RTLibcallImpl == RTLIB::Unsupported) {
843 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
850 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
868 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
872 Next->eraseFromParent();
873 }
while (
MI.getNextNode());
883 unsigned Opc =
MI.getOpcode();
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
890 return RTLIB::UNKNOWN_LIBCALL;
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
923 return RTLIB::UNKNOWN_LIBCALL;
936 unsigned Opc =
MI.getOpcode();
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
946 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) =
MI.getFirst5RegLLTs();
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
967 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
971 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
986 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
991 if (RTLibcallImpl == RTLIB::Unsupported) {
998 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (
MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1060 LLT StateTy = MRI.getType(Dst);
1063 MachinePointerInfo TempPtrInfo;
1067 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1098 LLT StateTy = MRI.getType(Src);
1101 MachinePointerInfo TempPtrInfo;
1110 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver,
nullptr);
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1134 llvm_unreachable("unexpected size"); \
1165 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1168 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond =
Cmp->getCond();
1181 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1190 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1197 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1205 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1239 const auto [OeqLibcall, OeqPred] =
1244 const auto [UnoLibcall, UnoPred] =
1249 if (NotOeq && NotUno)
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1295 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1297 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1311 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1313 switch (
MI.getOpcode()) {
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1369 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1382 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1392 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1405 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1407 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1413 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1416 MI.eraseFromParent();
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1425 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1427 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1432 {
MI.getOperand(1).getReg(), HLTy, 0},
1433 {
MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1445 if (!FromTy || !ToTy)
1452 case TargetOpcode::G_FCMP: {
1456 MI.eraseFromParent();
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1464 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1468 FromTy, LocObserver);
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1508 MI.eraseFromParent();
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1534 MI.eraseFromParent();
1541 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1544 switch (
MI.getOpcode()) {
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1549 LLT DstTy = MRI.getType(DstReg);
1557 if (SizeOp0 % NarrowSize != 0) {
1562 MI.eraseFromParent();
1566 int NumParts = SizeOp0 / NarrowSize;
1569 for (
int i = 0; i < NumParts; ++i)
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1581 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1587 for (
int I = 0;
I != NumParts; ++
I) {
1588 unsigned Offset =
I * NarrowSize;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1597 if (LeftoverBits != 0) {
1601 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1605 insertParts(
MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1608 MI.eraseFromParent();
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1615 case TargetOpcode::G_TRUNC: {
1619 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1621 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1625 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1626 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1635 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1640 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1642 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1644 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1648 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1666 case TargetOpcode::G_EXTRACT:
1668 case TargetOpcode::G_INSERT:
1670 case TargetOpcode::G_LOAD: {
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1681 LoadMI.eraseFromParent();
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD:
1689 case TargetOpcode::G_FPEXTLOAD: {
1691 Register DstReg = LoadMI.getDstReg();
1692 Register PtrReg = LoadMI.getPointerReg();
1694 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1695 auto &MMO = LoadMI.getMMO();
1698 if (MemSize == NarrowSize) {
1700 }
else if (MemSize < NarrowSize) {
1701 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1702 }
else if (MemSize > NarrowSize) {
1714 LoadMI.eraseFromParent();
1717 case TargetOpcode::G_STORE: {
1720 Register SrcReg = StoreMI.getValueReg();
1721 LLT SrcTy = MRI.getType(SrcReg);
1722 if (SrcTy.isVector())
1725 int NumParts = SizeOp0 / NarrowSize;
1727 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1728 if (SrcTy.isVector() && LeftoverBits != 0)
1731 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1732 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1734 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1735 StoreMI.eraseFromParent();
1741 case TargetOpcode::G_FPTRUNCSTORE: {
1743 Register SrcReg = StoreMI.getValueReg();
1744 Register PtrReg = StoreMI.getPointerReg();
1746 auto &MMO = StoreMI.getMMO();
1748 if (MemSize > NarrowSize) {
1752 auto TmpReg =
MIRBuilder.buildFPTrunc(NarrowTy, SrcReg);
1753 if (MemSize == NarrowSize) {
1755 }
else if (MemSize < NarrowSize) {
1756 MIRBuilder.buildStoreInstr(TargetOpcode::G_FPTRUNCSTORE, TmpReg, PtrReg,
1760 StoreMI.eraseFromParent();
1763 case TargetOpcode::G_SELECT:
1765 case TargetOpcode::G_AND:
1766 case TargetOpcode::G_OR:
1767 case TargetOpcode::G_XOR: {
1779 case TargetOpcode::G_SHL:
1780 case TargetOpcode::G_LSHR:
1781 case TargetOpcode::G_ASHR:
1783 case TargetOpcode::G_CTLZ:
1784 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1785 case TargetOpcode::G_CTTZ:
1786 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1787 case TargetOpcode::G_CTLS:
1788 case TargetOpcode::G_CTPOP:
1790 switch (
MI.getOpcode()) {
1791 case TargetOpcode::G_CTLZ:
1792 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1794 case TargetOpcode::G_CTTZ:
1795 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1797 case TargetOpcode::G_CTPOP:
1799 case TargetOpcode::G_CTLS:
1809 case TargetOpcode::G_INTTOPTR:
1817 case TargetOpcode::G_PTRTOINT:
1825 case TargetOpcode::G_PHI: {
1828 if (SizeOp0 % NarrowSize != 0)
1831 unsigned NumParts = SizeOp0 / NarrowSize;
1835 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1843 for (
unsigned i = 0; i < NumParts; ++i) {
1844 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1846 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1847 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1848 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1851 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1853 MI.eraseFromParent();
1856 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1857 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1861 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1867 case TargetOpcode::G_ICMP: {
1869 LLT SrcTy = MRI.getType(LHS);
1875 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1881 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1882 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1888 LLT ResTy = MRI.getType(Dst);
1893 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1895 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1896 auto LHS = std::get<0>(LHSAndRHS);
1897 auto RHS = std::get<1>(LHSAndRHS);
1898 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1905 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1906 auto LHS = std::get<0>(LHSAndRHS);
1907 auto RHS = std::get<1>(LHSAndRHS);
1908 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1909 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1910 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1911 TargetOpcode::G_ZEXT);
1918 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1919 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1920 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1925 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1929 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1934 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1938 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1941 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1944 LHSPartRegs[
I], RHSPartRegs[
I]);
1945 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1951 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1960 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1964 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1965 RHSLeftoverRegs[
I]);
1967 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1968 RHSLeftoverRegs[
I]);
1971 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1972 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1978 MI.eraseFromParent();
1981 case TargetOpcode::G_FCMP:
1990 case TargetOpcode::G_SEXT_INREG: {
1994 int64_t SizeInBits =
MI.getOperand(2).getImm();
2003 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
2004 MO1.
setReg(TruncMIB.getReg(0));
2007 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
2019 if (SizeOp0 % NarrowSize != 0)
2021 int NumParts = SizeOp0 / NarrowSize;
2029 for (
int i = 0; i < NumParts; ++i) {
2030 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2045 for (
int i = 0; i < NumParts; ++i) {
2048 PartialExtensionReg = DstRegs.
back();
2050 assert(PartialExtensionReg &&
2051 "Expected to visit partial extension before full");
2052 if (FullExtensionReg) {
2057 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2059 FullExtensionReg = DstRegs.
back();
2064 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2067 PartialExtensionReg = DstRegs.
back();
2073 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2074 MI.eraseFromParent();
2077 case TargetOpcode::G_BSWAP:
2078 case TargetOpcode::G_BITREVERSE: {
2079 if (SizeOp0 % NarrowSize != 0)
2084 unsigned NumParts = SizeOp0 / NarrowSize;
2085 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2088 for (
unsigned i = 0; i < NumParts; ++i) {
2089 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2090 {SrcRegs[NumParts - 1 - i]});
2094 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2097 MI.eraseFromParent();
2100 case TargetOpcode::G_PTR_ADD:
2101 case TargetOpcode::G_PTRMASK: {
2109 case TargetOpcode::G_FPTOUI:
2110 case TargetOpcode::G_FPTOSI:
2111 case TargetOpcode::G_FPTOUI_SAT:
2112 case TargetOpcode::G_FPTOSI_SAT:
2114 case TargetOpcode::G_FPEXT:
2121 case TargetOpcode::G_FLDEXP:
2122 case TargetOpcode::G_STRICT_FLDEXP:
2124 case TargetOpcode::G_VSCALE: {
2126 LLT Ty = MRI.getType(Dst);
2130 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2131 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2132 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2135 MI.eraseFromParent();
2142 LLT Ty = MRI.getType(Val);
2148 if (Ty.isPointer()) {
2149 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2151 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2157 if (Ty.isPointerVector())
2158 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2159 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2163 unsigned OpIdx,
unsigned ExtOpcode) {
2165 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2166 MO.
setReg(ExtB.getReg(0));
2172 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2173 MO.
setReg(ExtB.getReg(0));
2177 unsigned OpIdx,
unsigned TruncOpcode) {
2179 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2181 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2186 unsigned OpIdx,
unsigned ExtOpcode) {
2188 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2190 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2199 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2201 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2207 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2217 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2224LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2229 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2230 if (DstTy.isVector())
2235 const int SrcSize = SrcTy.getSizeInBits();
2237 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2239 unsigned NumOps =
MI.getNumOperands();
2240 unsigned NumSrc =
MI.getNumOperands() - 1;
2241 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2243 if (WideSize >= DstSize) {
2247 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2248 const unsigned Offset = (
I - 1) * PartSize;
2261 ResultReg = NextResult;
2264 if (WideSize > DstSize)
2266 else if (DstTy.isPointer())
2269 MI.eraseFromParent();
2294 const int GCD = std::gcd(SrcSize, WideSize);
2304 if (GCD == SrcSize) {
2307 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2308 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2314 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2316 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2320 const int PartsPerGCD = WideSize / GCD;
2324 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2326 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2333 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2335 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2336 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2339 MI.eraseFromParent();
2344LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2349 int NumDst =
MI.getNumOperands() - 1;
2350 Register SrcReg =
MI.getOperand(NumDst).getReg();
2351 LLT SrcTy = MRI.getType(SrcReg);
2355 Register Dst0Reg =
MI.getOperand(0).getReg();
2356 LLT DstTy = MRI.getType(Dst0Reg);
2365 dbgs() <<
"Not casting non-integral address space integer\n");
2370 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2378 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2386 for (
int I = 1;
I != NumDst; ++
I) {
2387 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2388 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2392 MI.eraseFromParent();
2403 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2407 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2410 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2428 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2433 if (PartsPerRemerge == 1) {
2436 for (
int I = 0;
I != NumUnmerge; ++
I) {
2437 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2439 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2440 int Idx =
I * PartsPerUnmerge + J;
2442 MIB.addDef(
MI.getOperand(Idx).getReg());
2445 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2449 MIB.addUse(Unmerge.getReg(
I));
2452 SmallVector<Register, 16> Parts;
2453 for (
int J = 0; J != NumUnmerge; ++J)
2454 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2457 for (
int I = 0;
I != NumDst; ++
I) {
2458 for (
int J = 0; J < PartsPerRemerge; ++J) {
2459 const int Idx =
I * PartsPerRemerge + J;
2463 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2464 RemergeParts.
clear();
2468 MI.eraseFromParent();
2473LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2475 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2476 unsigned Offset =
MI.getOperand(2).getImm();
2479 if (SrcTy.
isVector() || DstTy.isVector())
2491 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2495 if (DstTy.isPointer())
2502 MI.eraseFromParent();
2507 LLT ShiftTy = SrcTy;
2516 MI.eraseFromParent();
2547LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2549 if (TypeIdx != 0 || WideTy.
isVector())
2559LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2563 std::optional<Register> CarryIn;
2564 switch (
MI.getOpcode()) {
2567 case TargetOpcode::G_SADDO:
2568 Opcode = TargetOpcode::G_ADD;
2569 ExtOpcode = TargetOpcode::G_SEXT;
2571 case TargetOpcode::G_SSUBO:
2572 Opcode = TargetOpcode::G_SUB;
2573 ExtOpcode = TargetOpcode::G_SEXT;
2575 case TargetOpcode::G_UADDO:
2576 Opcode = TargetOpcode::G_ADD;
2577 ExtOpcode = TargetOpcode::G_ZEXT;
2579 case TargetOpcode::G_USUBO:
2580 Opcode = TargetOpcode::G_SUB;
2581 ExtOpcode = TargetOpcode::G_ZEXT;
2583 case TargetOpcode::G_SADDE:
2584 Opcode = TargetOpcode::G_UADDE;
2585 ExtOpcode = TargetOpcode::G_SEXT;
2586 CarryIn =
MI.getOperand(4).getReg();
2588 case TargetOpcode::G_SSUBE:
2589 Opcode = TargetOpcode::G_USUBE;
2590 ExtOpcode = TargetOpcode::G_SEXT;
2591 CarryIn =
MI.getOperand(4).getReg();
2593 case TargetOpcode::G_UADDE:
2594 Opcode = TargetOpcode::G_UADDE;
2595 ExtOpcode = TargetOpcode::G_ZEXT;
2596 CarryIn =
MI.getOperand(4).getReg();
2598 case TargetOpcode::G_USUBE:
2599 Opcode = TargetOpcode::G_USUBE;
2600 ExtOpcode = TargetOpcode::G_ZEXT;
2601 CarryIn =
MI.getOperand(4).getReg();
2617 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2618 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2622 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2624 .buildInstr(Opcode, {WideTy, CarryOutTy},
2625 {LHSExt, RHSExt, *CarryIn})
2628 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2630 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2631 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2632 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2637 MI.eraseFromParent();
2642LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2644 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2645 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2646 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2647 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2648 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2661 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2668 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2672 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2673 {ShiftL, ShiftR},
MI.getFlags());
2678 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2681 MI.eraseFromParent();
2686LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2695 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2697 LLT SrcTy = MRI.getType(
LHS);
2698 LLT OverflowTy = MRI.getType(OriginalOverflow);
2705 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2706 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2707 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2714 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2716 MachineInstrBuilder Mulo;
2717 if (WideMulCanOverflow)
2718 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2719 {LeftOperand, RightOperand});
2721 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2726 MachineInstrBuilder ExtResult;
2733 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2737 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2740 if (WideMulCanOverflow) {
2748 MI.eraseFromParent();
2754 unsigned Opcode =
MI.getOpcode();
2758 case TargetOpcode::G_ATOMICRMW_XCHG:
2759 case TargetOpcode::G_ATOMICRMW_ADD:
2760 case TargetOpcode::G_ATOMICRMW_SUB:
2761 case TargetOpcode::G_ATOMICRMW_AND:
2762 case TargetOpcode::G_ATOMICRMW_OR:
2763 case TargetOpcode::G_ATOMICRMW_XOR:
2764 case TargetOpcode::G_ATOMICRMW_MIN:
2765 case TargetOpcode::G_ATOMICRMW_MAX:
2766 case TargetOpcode::G_ATOMICRMW_UMIN:
2767 case TargetOpcode::G_ATOMICRMW_UMAX:
2768 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2774 case TargetOpcode::G_ATOMIC_CMPXCHG:
2775 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2782 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2792 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2797 case TargetOpcode::G_EXTRACT:
2798 return widenScalarExtract(
MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_INSERT:
2800 return widenScalarInsert(
MI, TypeIdx, WideTy);
2801 case TargetOpcode::G_MERGE_VALUES:
2802 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2803 case TargetOpcode::G_UNMERGE_VALUES:
2804 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2805 case TargetOpcode::G_SADDO:
2806 case TargetOpcode::G_SSUBO:
2807 case TargetOpcode::G_UADDO:
2808 case TargetOpcode::G_USUBO:
2809 case TargetOpcode::G_SADDE:
2810 case TargetOpcode::G_SSUBE:
2811 case TargetOpcode::G_UADDE:
2812 case TargetOpcode::G_USUBE:
2813 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2814 case TargetOpcode::G_UMULO:
2815 case TargetOpcode::G_SMULO:
2816 return widenScalarMulo(
MI, TypeIdx, WideTy);
2817 case TargetOpcode::G_SADDSAT:
2818 case TargetOpcode::G_SSUBSAT:
2819 case TargetOpcode::G_SSHLSAT:
2820 case TargetOpcode::G_UADDSAT:
2821 case TargetOpcode::G_USUBSAT:
2822 case TargetOpcode::G_USHLSAT:
2823 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2824 case TargetOpcode::G_CTTZ:
2825 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2826 case TargetOpcode::G_CTLZ:
2827 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2828 case TargetOpcode::G_CTLS:
2829 case TargetOpcode::G_CTPOP: {
2842 case TargetOpcode::G_CTTZ:
2843 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2844 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2845 ExtOpc = TargetOpcode::G_ANYEXT;
2847 case TargetOpcode::G_CTLS:
2848 ExtOpc = TargetOpcode::G_SEXT;
2851 ExtOpc = TargetOpcode::G_ZEXT;
2854 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2855 LLT CurTy = MRI.getType(SrcReg);
2856 unsigned NewOpc = Opcode;
2857 if (NewOpc == TargetOpcode::G_CTTZ) {
2864 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2866 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2872 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2882 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2884 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2889 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff),
2890 Opcode == TargetOpcode::G_CTLZ
2895 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2896 MI.eraseFromParent();
2899 case TargetOpcode::G_BSWAP: {
2903 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2904 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2905 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2908 MI.getOperand(0).setReg(DstExt);
2912 LLT Ty = MRI.getType(DstReg);
2914 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2915 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2921 case TargetOpcode::G_BITREVERSE: {
2925 LLT Ty = MRI.getType(DstReg);
2928 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2930 MI.getOperand(0).setReg(DstExt);
2933 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2934 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2939 case TargetOpcode::G_FREEZE:
2940 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2947 case TargetOpcode::G_ABS:
2954 case TargetOpcode::G_ADD:
2955 case TargetOpcode::G_AND:
2956 case TargetOpcode::G_MUL:
2957 case TargetOpcode::G_OR:
2958 case TargetOpcode::G_XOR:
2959 case TargetOpcode::G_SUB:
2960 case TargetOpcode::G_SHUFFLE_VECTOR:
2971 case TargetOpcode::G_SBFX:
2972 case TargetOpcode::G_UBFX:
2986 case TargetOpcode::G_SHL:
3002 case TargetOpcode::G_ROTR:
3003 case TargetOpcode::G_ROTL:
3012 case TargetOpcode::G_SDIV:
3013 case TargetOpcode::G_SREM:
3014 case TargetOpcode::G_SMIN:
3015 case TargetOpcode::G_SMAX:
3016 case TargetOpcode::G_ABDS:
3024 case TargetOpcode::G_SDIVREM:
3034 case TargetOpcode::G_ASHR:
3035 case TargetOpcode::G_LSHR:
3039 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3040 : TargetOpcode::G_ZEXT;
3053 case TargetOpcode::G_UDIV:
3054 case TargetOpcode::G_UREM:
3055 case TargetOpcode::G_ABDU:
3062 case TargetOpcode::G_UDIVREM:
3071 case TargetOpcode::G_UMIN:
3072 case TargetOpcode::G_UMAX: {
3073 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3075 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3079 ? TargetOpcode::G_SEXT
3080 : TargetOpcode::G_ZEXT;
3090 case TargetOpcode::G_SELECT:
3100 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3107 case TargetOpcode::G_FPEXT:
3115 case TargetOpcode::G_FPTOSI:
3116 case TargetOpcode::G_FPTOUI:
3117 case TargetOpcode::G_INTRINSIC_LRINT:
3118 case TargetOpcode::G_INTRINSIC_LLRINT:
3119 case TargetOpcode::G_IS_FPCLASS:
3129 case TargetOpcode::G_SITOFP:
3139 case TargetOpcode::G_UITOFP:
3149 case TargetOpcode::G_FPTOSI_SAT:
3150 case TargetOpcode::G_FPTOUI_SAT:
3155 LLT Ty = MRI.getType(OldDst);
3156 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3158 MI.getOperand(0).setReg(ExtReg);
3159 uint64_t ShortBits = Ty.getScalarSizeInBits();
3162 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3173 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3174 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3182 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3190 case TargetOpcode::G_LOAD:
3191 case TargetOpcode::G_SEXTLOAD:
3192 case TargetOpcode::G_ZEXTLOAD:
3193 case TargetOpcode::G_FPEXTLOAD:
3199 case TargetOpcode::G_STORE: {
3203 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3204 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3205 if (!Ty.isScalar()) {
3213 MI.setMemRefs(MF, {NewMMO});
3220 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3221 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3227 case TargetOpcode::G_FPTRUNCSTORE:
3234 case TargetOpcode::G_CONSTANT: {
3237 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3238 MRI.getType(
MI.getOperand(0).getReg()));
3239 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3240 ExtOpc == TargetOpcode::G_ANYEXT) &&
3243 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3247 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3253 case TargetOpcode::G_FCONSTANT: {
3259 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3261 MI.eraseFromParent();
3264 case TargetOpcode::G_IMPLICIT_DEF: {
3270 case TargetOpcode::G_BRCOND:
3276 case TargetOpcode::G_FCMP:
3287 case TargetOpcode::G_ICMP:
3292 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3296 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3297 unsigned ExtOpcode =
3301 ? TargetOpcode::G_SEXT
3302 : TargetOpcode::G_ZEXT;
3309 case TargetOpcode::G_PTR_ADD:
3310 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3316 case TargetOpcode::G_PHI: {
3317 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3320 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3332 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3335 LLT VecTy = MRI.getType(VecReg);
3339 TargetOpcode::G_ANYEXT);
3353 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3369 LLT VecTy = MRI.getType(VecReg);
3388 case TargetOpcode::G_FADD:
3389 case TargetOpcode::G_FMUL:
3390 case TargetOpcode::G_FSUB:
3391 case TargetOpcode::G_FMA:
3392 case TargetOpcode::G_FMAD:
3393 case TargetOpcode::G_FNEG:
3394 case TargetOpcode::G_FABS:
3395 case TargetOpcode::G_FCANONICALIZE:
3396 case TargetOpcode::G_FMINNUM:
3397 case TargetOpcode::G_FMAXNUM:
3398 case TargetOpcode::G_FMINNUM_IEEE:
3399 case TargetOpcode::G_FMAXNUM_IEEE:
3400 case TargetOpcode::G_FMINIMUM:
3401 case TargetOpcode::G_FMAXIMUM:
3402 case TargetOpcode::G_FMINIMUMNUM:
3403 case TargetOpcode::G_FMAXIMUMNUM:
3404 case TargetOpcode::G_FDIV:
3405 case TargetOpcode::G_FREM:
3406 case TargetOpcode::G_FCEIL:
3407 case TargetOpcode::G_FFLOOR:
3408 case TargetOpcode::G_FCOS:
3409 case TargetOpcode::G_FSIN:
3410 case TargetOpcode::G_FTAN:
3411 case TargetOpcode::G_FACOS:
3412 case TargetOpcode::G_FASIN:
3413 case TargetOpcode::G_FATAN:
3414 case TargetOpcode::G_FATAN2:
3415 case TargetOpcode::G_FCOSH:
3416 case TargetOpcode::G_FSINH:
3417 case TargetOpcode::G_FTANH:
3418 case TargetOpcode::G_FLOG10:
3419 case TargetOpcode::G_FLOG:
3420 case TargetOpcode::G_FLOG2:
3421 case TargetOpcode::G_FRINT:
3422 case TargetOpcode::G_FNEARBYINT:
3423 case TargetOpcode::G_FSQRT:
3424 case TargetOpcode::G_FEXP:
3425 case TargetOpcode::G_FEXP2:
3426 case TargetOpcode::G_FEXP10:
3427 case TargetOpcode::G_FPOW:
3428 case TargetOpcode::G_INTRINSIC_TRUNC:
3429 case TargetOpcode::G_INTRINSIC_ROUND:
3430 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3434 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3440 case TargetOpcode::G_FMODF: {
3450 case TargetOpcode::G_FPOWI:
3451 case TargetOpcode::G_FLDEXP:
3452 case TargetOpcode::G_STRICT_FLDEXP: {
3454 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3475 case TargetOpcode::G_FFREXP: {
3488 case TargetOpcode::G_LROUND:
3489 case TargetOpcode::G_LLROUND:
3500 case TargetOpcode::G_INTTOPTR:
3508 case TargetOpcode::G_PTRTOINT:
3516 case TargetOpcode::G_BUILD_VECTOR: {
3520 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3526 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3534 case TargetOpcode::G_SEXT_INREG:
3543 case TargetOpcode::G_PTRMASK: {
3551 case TargetOpcode::G_VECREDUCE_ADD: {
3560 case TargetOpcode::G_VECREDUCE_FADD:
3561 case TargetOpcode::G_VECREDUCE_FMUL:
3562 case TargetOpcode::G_VECREDUCE_FMIN:
3563 case TargetOpcode::G_VECREDUCE_FMAX:
3564 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3565 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3570 LLT VecTy = MRI.getType(VecReg);
3577 case TargetOpcode::G_VSCALE: {
3584 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3589 case TargetOpcode::G_SPLAT_VECTOR: {
3598 case TargetOpcode::G_INSERT_SUBVECTOR: {
3606 LLT SubVecTy = MRI.getType(SubVec);
3610 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3611 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3612 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3616 auto SplatZero =
MIRBuilder.buildSplatVector(
3621 MI.eraseFromParent();
3630 auto Unmerge =
B.buildUnmerge(Ty, Src);
3631 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3640 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3654 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3663 MI.eraseFromParent();
3674 MI.eraseFromParent();
3681 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3682 if (SrcTy.isVector()) {
3686 if (DstTy.isVector()) {
3687 int NumDstElt = DstTy.getNumElements();
3688 int NumSrcElt = SrcTy.getNumElements();
3691 LLT DstCastTy = DstEltTy;
3692 LLT SrcPartTy = SrcEltTy;
3696 if (NumSrcElt < NumDstElt) {
3707 SrcPartTy = SrcEltTy;
3708 }
else if (NumSrcElt > NumDstElt) {
3720 DstCastTy = DstEltTy;
3725 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3729 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3730 MI.eraseFromParent();
3734 if (DstTy.isVector()) {
3737 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3738 MI.eraseFromParent();
3754 unsigned NewEltSize,
3755 unsigned OldEltSize) {
3756 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3757 LLT IdxTy =
B.getMRI()->getType(Idx);
3760 auto OffsetMask =
B.buildConstant(
3762 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3763 return B.buildShl(IdxTy, OffsetIdx,
3764 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3779 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3783 unsigned OldNumElts = SrcVecTy.getNumElements();
3790 if (NewNumElts > OldNumElts) {
3801 if (NewNumElts % OldNumElts != 0)
3805 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3809 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3812 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3814 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3815 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3816 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3817 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3818 NewOps[
I] = Elt.getReg(0);
3821 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3823 MI.eraseFromParent();
3827 if (NewNumElts < OldNumElts) {
3828 if (NewEltSize % OldEltSize != 0)
3850 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3851 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3854 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3858 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3859 ScaledIdx).getReg(0);
3867 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3869 MI.eraseFromParent();
3883 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3884 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3885 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3886 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3889 auto EltMask =
B.buildConstant(
3893 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3894 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3897 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3901 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3915 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3916 MI.getFirst4RegLLTs();
3928 if (NewNumElts < OldNumElts) {
3929 if (NewEltSize % OldEltSize != 0)
3938 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3939 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3942 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3946 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3947 ScaledIdx).getReg(0);
3957 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3958 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3962 MI.eraseFromParent();
3992 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3996 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3997 return UnableToLegalize;
4002 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
4004 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
4013 MI.eraseFromParent();
4031 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4032 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4042 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4043 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4045 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4046 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4048 MI.eraseFromParent();
4078 LLT DstTy = MRI.getType(Dst);
4079 LLT SrcTy = MRI.getType(Src);
4085 if (DstTy == CastTy)
4093 if (CastEltSize < DstEltSize)
4096 auto AdjustAmt = CastEltSize / DstEltSize;
4097 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4098 SrcTyMinElts % AdjustAmt != 0)
4103 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4104 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4107 ES->eraseFromParent();
4142 LLT DstTy = MRI.getType(Dst);
4143 LLT BigVecTy = MRI.getType(BigVec);
4144 LLT SubVecTy = MRI.getType(SubVec);
4146 if (DstTy == CastTy)
4161 if (CastEltSize < DstEltSize)
4164 auto AdjustAmt = CastEltSize / DstEltSize;
4165 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4166 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4172 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4173 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4175 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4178 ES->eraseFromParent();
4186 LLT DstTy = MRI.getType(DstReg);
4196 if (MemSizeInBits != MemStoreSizeInBits) {
4213 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4217 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4218 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4220 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4223 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4225 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4228 if (DstTy != LoadTy)
4236 if (
MIRBuilder.getDataLayout().isBigEndian())
4254 uint64_t LargeSplitSize, SmallSplitSize;
4259 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4266 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4269 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4280 if (Alignment.
value() * 8 > MemSizeInBits &&
4285 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4302 LLT PtrTy = MRI.getType(PtrReg);
4315 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4318 auto OffsetCst =
MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4319 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4320 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4322 SmallPtr, *SmallMMO);
4324 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4325 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4327 if (AnyExtTy == DstTy)
4328 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4330 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4334 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4354 LLT SrcTy = MRI.getType(SrcReg);
4362 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4368 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4370 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4374 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4378 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4393 uint64_t LargeSplitSize, SmallSplitSize;
4400 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4403 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4412 if (SrcTy.isPointer()) {
4414 SrcReg =
MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4417 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4420 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4421 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4424 LLT PtrTy = MRI.getType(PtrReg);
4426 LargeSplitSize / 8);
4427 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4433 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4434 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4443 LLT SrcTy = MRI.getType(SrcReg);
4449 assert(SrcTy.isVector() &&
"Expect a vector store type");
4456 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4460 auto Elt =
MIRBuilder.buildExtractVectorElement(
4461 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4462 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4463 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4469 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4470 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4474 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4485 switch (
MI.getOpcode()) {
4486 case TargetOpcode::G_LOAD: {
4504 case TargetOpcode::G_STORE: {
4520 case TargetOpcode::G_SELECT: {
4524 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4526 dbgs() <<
"bitcast action not implemented for vector select\n");
4537 case TargetOpcode::G_AND:
4538 case TargetOpcode::G_OR:
4539 case TargetOpcode::G_XOR: {
4547 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4549 case TargetOpcode::G_INSERT_VECTOR_ELT:
4551 case TargetOpcode::G_CONCAT_VECTORS:
4553 case TargetOpcode::G_SHUFFLE_VECTOR:
4555 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4557 case TargetOpcode::G_INSERT_SUBVECTOR:
4565void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4575 switch(
MI.getOpcode()) {
4578 case TargetOpcode::G_FCONSTANT:
4580 case TargetOpcode::G_BITCAST:
4582 case TargetOpcode::G_SREM:
4583 case TargetOpcode::G_UREM: {
4584 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4586 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4587 {MI.getOperand(1), MI.getOperand(2)});
4589 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4591 MI.eraseFromParent();
4594 case TargetOpcode::G_SADDO:
4595 case TargetOpcode::G_SSUBO:
4597 case TargetOpcode::G_SADDE:
4599 case TargetOpcode::G_SSUBE:
4601 case TargetOpcode::G_UMULH:
4602 case TargetOpcode::G_SMULH:
4604 case TargetOpcode::G_SMULO:
4605 case TargetOpcode::G_UMULO: {
4608 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4609 LLT Ty = MRI.getType(Res);
4611 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4612 ? TargetOpcode::G_SMULH
4613 : TargetOpcode::G_UMULH;
4617 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4618 MI.removeOperand(1);
4621 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4629 if (Opcode == TargetOpcode::G_SMULH) {
4630 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4631 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4638 case TargetOpcode::G_FNEG: {
4639 auto [Res, ResTy, SubByReg, SubByRegTy] =
MI.getFirst2RegLLTs();
4642 Register CastedSubByReg = SubByReg;
4644 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4645 !SubByRegTy.getScalarType().isInteger()) {
4646 auto BitcastDst = SubByRegTy.changeElementType(
4648 CastedSubByReg =
MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4654 if (ResTy != TyInt) {
4656 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4659 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4661 MI.eraseFromParent();
4664 case TargetOpcode::G_FSUB:
4665 case TargetOpcode::G_STRICT_FSUB: {
4666 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4667 LLT Ty = MRI.getType(Res);
4672 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4673 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4677 MI.eraseFromParent();
4680 case TargetOpcode::G_FMAD:
4682 case TargetOpcode::G_FFLOOR:
4684 case TargetOpcode::G_LROUND:
4685 case TargetOpcode::G_LLROUND: {
4688 LLT SrcTy = MRI.getType(SrcReg);
4689 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4692 MI.eraseFromParent();
4695 case TargetOpcode::G_INTRINSIC_ROUND:
4697 case TargetOpcode::G_FRINT: {
4700 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4703 case TargetOpcode::G_INTRINSIC_LRINT:
4704 case TargetOpcode::G_INTRINSIC_LLRINT: {
4707 LLT SrcTy = MRI.getType(SrcReg);
4709 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4711 MI.eraseFromParent();
4714 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4715 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4716 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4717 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4718 **
MI.memoperands_begin());
4720 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4721 MI.eraseFromParent();
4724 case TargetOpcode::G_LOAD:
4725 case TargetOpcode::G_SEXTLOAD:
4726 case TargetOpcode::G_ZEXTLOAD:
4728 case TargetOpcode::G_STORE:
4730 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4731 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4732 case TargetOpcode::G_CTLZ:
4733 case TargetOpcode::G_CTTZ:
4734 case TargetOpcode::G_CTPOP:
4735 case TargetOpcode::G_CTLS:
4738 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4740 Register NewRes = MRI.cloneVirtualRegister(Res);
4747 MI.eraseFromParent();
4751 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4752 const LLT CondTy = MRI.getType(CarryOut);
4753 const LLT Ty = MRI.getType(Res);
4755 Register NewRes = MRI.cloneVirtualRegister(Res);
4758 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4764 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4765 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4772 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4777 MI.eraseFromParent();
4781 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4786 MI.eraseFromParent();
4790 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4791 const LLT CondTy = MRI.getType(BorrowOut);
4792 const LLT Ty = MRI.getType(Res);
4795 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4801 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4802 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4809 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4810 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4812 MI.eraseFromParent();
4852 case G_MERGE_VALUES:
4854 case G_UNMERGE_VALUES:
4856 case TargetOpcode::G_SEXT_INREG: {
4857 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4858 int64_t SizeInBits =
MI.getOperand(2).getImm();
4860 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4861 LLT DstTy = MRI.getType(DstReg);
4862 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4865 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4866 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4867 MI.eraseFromParent();
4870 case G_EXTRACT_VECTOR_ELT:
4871 case G_INSERT_VECTOR_ELT:
4873 case G_SHUFFLE_VECTOR:
4875 case G_VECTOR_COMPRESS:
4877 case G_DYN_STACKALLOC:
4881 case G_STACKRESTORE:
4891 case G_READ_REGISTER:
4892 case G_WRITE_REGISTER:
4899 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4900 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4906 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4911 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4922 bool IsSigned =
MI.getOpcode() == G_ABDS;
4923 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4924 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4925 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4949 case G_MEMCPY_INLINE:
4950 return lowerMemcpyInline(
MI);
4961 case G_ATOMICRMW_SUB: {
4962 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
4963 const LLT ValTy = MRI.getType(Val);
4967 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4968 MI.eraseFromParent();
4991 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4995 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
5001 Align StackTypeAlign =
5008 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
5009 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
5014 LLT IdxTy =
B.getMRI()->getType(IdxReg);
5026 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
5029 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
5040 "Converting bits to bytes lost precision");
5046 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5047 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
5049 if (IdxTy != MRI.getType(Index))
5050 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5055 LLT PtrTy = MRI.getType(VecPtr);
5056 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
5064 std::initializer_list<unsigned> NonVecOpIndices) {
5065 if (
MI.getNumMemOperands() != 0)
5082 if (!Ty.isVector()) {
5088 if (Ty.getNumElements() != NumElts)
5103 assert(Ty.isVector() &&
"Expected vector type");
5105 int NumParts, NumLeftover;
5106 std::tie(NumParts, NumLeftover) =
5109 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5110 for (
int i = 0; i < NumParts; ++i) {
5115 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5124 for (
unsigned i = 0; i <
N; ++i) {
5126 Ops.push_back(
Op.getReg());
5127 else if (
Op.isImm())
5128 Ops.push_back(
Op.getImm());
5129 else if (
Op.isPredicate())
5151 std::initializer_list<unsigned> NonVecOpIndices) {
5153 "Non-compatible opcode or not specified non-vector operands");
5154 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5156 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5157 unsigned NumDefs =
MI.getNumDefs();
5165 for (
unsigned i = 0; i < NumDefs; ++i) {
5166 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5174 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5175 ++UseIdx, ++UseNo) {
5178 MI.getOperand(UseIdx));
5187 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5191 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5193 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5194 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5197 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5198 Uses.push_back(InputOpsPieces[InputNo][i]);
5201 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5202 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5207 for (
unsigned i = 0; i < NumDefs; ++i)
5208 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5210 for (
unsigned i = 0; i < NumDefs; ++i)
5211 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5214 MI.eraseFromParent();
5221 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5223 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5224 unsigned NumDefs =
MI.getNumDefs();
5228 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5233 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5234 UseIdx += 2, ++UseNo) {
5242 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5244 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5245 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5247 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5250 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5251 Phi.addUse(InputOpsPieces[j][i]);
5252 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5262 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5264 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5267 MI.eraseFromParent();
5275 const int NumDst =
MI.getNumOperands() - 1;
5276 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5277 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5278 LLT SrcTy = MRI.getType(SrcReg);
5280 if (TypeIdx != 1 || NarrowTy == DstTy)
5287 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5290 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5304 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5305 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5306 const int PartsPerUnmerge = NumDst / NumUnmerge;
5308 for (
int I = 0;
I != NumUnmerge; ++
I) {
5309 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5311 for (
int J = 0; J != PartsPerUnmerge; ++J)
5312 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5313 MIB.addUse(Unmerge.getReg(
I));
5316 MI.eraseFromParent();
5323 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5327 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5329 if (NarrowTy == SrcTy)
5337 assert(SrcTy.isVector() &&
"Expected vector types");
5339 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5353 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5354 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5355 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5361 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5362 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5363 ++i,
Offset += NumNarrowTyElts) {
5366 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5369 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5370 MI.eraseFromParent();
5374 assert(TypeIdx == 0 &&
"Bad type index");
5375 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5390 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5391 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5393 for (
unsigned i = 0; i < NumParts; ++i) {
5395 for (
unsigned j = 0; j < NumElts; ++j)
5396 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5398 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5401 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5402 MI.eraseFromParent();
5410 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5412 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5414 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5416 InsertVal =
MI.getOperand(2).getReg();
5418 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5419 LLT VecTy = MRI.getType(SrcVec);
5425 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5429 MI.eraseFromParent();
5438 SplitPieces[IdxVal] = InsertVal;
5439 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5441 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5445 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5448 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5449 TargetOpcode::G_ANYEXT);
5453 LLT IdxTy = MRI.getType(Idx);
5454 int64_t PartIdx = IdxVal / NewNumElts;
5456 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5459 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5462 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5463 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5464 VecParts[PartIdx] = InsertPart.getReg(0);
5468 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5470 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5474 MI.eraseFromParent();
5494 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5506 LLT ValTy = MRI.getType(ValReg);
5515 int NumLeftover = -1;
5521 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5523 NumParts = NarrowRegs.
size();
5524 NumLeftover = NarrowLeftoverRegs.
size();
5531 LLT PtrTy = MRI.getType(AddrReg);
5541 auto MMO = LdStMI.
getMMO();
5543 unsigned NumParts,
unsigned Offset) ->
unsigned {
5546 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5548 unsigned ByteOffset =
Offset / 8;
5551 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5558 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5559 ValRegs.push_back(Dst);
5560 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5562 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5571 unsigned HandledOffset =
5572 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5576 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5579 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5580 LeftoverTy, NarrowLeftoverRegs);
5594 switch (
MI.getOpcode()) {
5595 case G_IMPLICIT_DEF:
5611 case G_FCANONICALIZE:
5628 case G_INTRINSIC_LRINT:
5629 case G_INTRINSIC_LLRINT:
5630 case G_INTRINSIC_ROUND:
5631 case G_INTRINSIC_ROUNDEVEN:
5634 case G_INTRINSIC_TRUNC:
5662 case G_FMINNUM_IEEE:
5663 case G_FMAXNUM_IEEE:
5685 case G_CTLZ_ZERO_UNDEF:
5687 case G_CTTZ_ZERO_UNDEF:
5704 case G_ADDRSPACE_CAST:
5717 case G_STRICT_FLDEXP:
5719 case G_TRUNC_SSAT_S:
5720 case G_TRUNC_SSAT_U:
5721 case G_TRUNC_USAT_U:
5729 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5734 case G_UNMERGE_VALUES:
5736 case G_BUILD_VECTOR:
5737 assert(TypeIdx == 0 &&
"not a vector type index");
5739 case G_CONCAT_VECTORS:
5743 case G_EXTRACT_VECTOR_ELT:
5744 case G_INSERT_VECTOR_ELT:
5753 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5754 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5756 case G_SHUFFLE_VECTOR:
5762 case G_INTRINSIC_FPTRUNC_ROUND:
5772 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5773 "Not a bitcast operation");
5778 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5780 unsigned NewElemCount =
5783 if (NewElemCount == 1) {
5786 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5793 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5802 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5803 MI.eraseFromParent();
5809 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5813 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5814 MI.getFirst3RegLLTs();
5817 if (DstTy != Src1Ty)
5819 if (DstTy != Src2Ty)
5834 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5850 unsigned InputUsed[2] = {-1U, -1U};
5851 unsigned FirstMaskIdx =
High * NewElts;
5852 bool UseBuildVector =
false;
5853 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5855 int Idx = Mask[FirstMaskIdx + MaskOffset];
5860 if (
Input >= std::size(Inputs)) {
5867 Idx -=
Input * NewElts;
5871 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5872 if (InputUsed[OpNo] ==
Input) {
5875 }
else if (InputUsed[OpNo] == -1U) {
5877 InputUsed[OpNo] =
Input;
5882 if (OpNo >= std::size(InputUsed)) {
5885 UseBuildVector =
true;
5890 Ops.push_back(Idx + OpNo * NewElts);
5893 if (UseBuildVector) {
5898 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5900 int Idx = Mask[FirstMaskIdx + MaskOffset];
5905 if (
Input >= std::size(Inputs)) {
5912 Idx -=
Input * NewElts;
5916 .buildExtractVectorElement(
5917 EltTy, Inputs[
Input],
5923 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5924 }
else if (InputUsed[0] == -1U) {
5926 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5927 }
else if (NewElts == 1) {
5928 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5930 Register Op0 = Inputs[InputUsed[0]];
5934 : Inputs[InputUsed[1]];
5936 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5943 MI.eraseFromParent();
5956 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5962 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5965 const unsigned NumParts =
5967 : SrcTy.getNumElements();
5971 if (DstTy != NarrowTy)
5977 unsigned NumPartsLeft = NumParts;
5978 while (NumPartsLeft > 1) {
5979 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5982 .buildInstr(ScalarOpc, {NarrowTy},
5983 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5986 SplitSrcs = PartialResults;
5987 PartialResults.
clear();
5988 NumPartsLeft = SplitSrcs.
size();
5992 MI.eraseFromParent();
5997 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
5998 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
6001 MI.eraseFromParent();
6005 for (
unsigned Part = 0; Part < NumParts; ++Part) {
6007 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
6015 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
6018 Register Acc = PartialReductions[0];
6019 for (
unsigned Part = 1; Part < NumParts; ++Part) {
6020 if (Part == NumParts - 1) {
6022 {Acc, PartialReductions[Part]});
6025 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
6029 MI.eraseFromParent();
6035 unsigned int TypeIdx,
6037 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6038 MI.getFirst3RegLLTs();
6039 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6043 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6044 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6045 "Unexpected vecreduce opcode");
6046 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6047 ? TargetOpcode::G_FADD
6048 : TargetOpcode::G_FMUL;
6051 unsigned NumParts = SrcTy.getNumElements();
6054 for (
unsigned i = 0; i < NumParts; i++)
6055 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6059 MI.eraseFromParent();
6066 unsigned ScalarOpc) {
6074 while (SplitSrcs.
size() > 1) {
6076 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6084 SplitSrcs = std::move(PartialRdxs);
6088 MI.getOperand(1).setReg(SplitSrcs[0]);
6095 const LLT HalfTy,
const LLT AmtTy) {
6097 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6098 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6102 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6103 MI.eraseFromParent();
6109 unsigned VTBits = 2 * NVTBits;
6112 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6113 if (Amt.
ugt(VTBits)) {
6115 }
else if (Amt.
ugt(NVTBits)) {
6118 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6119 }
else if (Amt == NVTBits) {
6127 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6130 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6131 if (Amt.
ugt(VTBits)) {
6133 }
else if (Amt.
ugt(NVTBits)) {
6135 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6137 }
else if (Amt == NVTBits) {
6141 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6143 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6145 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6151 if (Amt.
ugt(VTBits)) {
6153 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6154 }
else if (Amt.
ugt(NVTBits)) {
6156 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6158 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6159 }
else if (Amt == NVTBits) {
6162 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6164 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6166 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6168 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6175 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6176 MI.eraseFromParent();
6192 LLT DstTy = MRI.getType(DstReg);
6197 LLT ShiftAmtTy = MRI.getType(Amt);
6199 if (DstEltSize % 2 != 0)
6215 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6226 const unsigned NewBitSize = DstEltSize / 2;
6238 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6240 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6241 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6244 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6245 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6247 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6252 switch (
MI.getOpcode()) {
6253 case TargetOpcode::G_SHL: {
6255 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6257 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6258 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6259 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6262 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6263 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6265 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6267 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6269 ResultRegs[0] =
Lo.getReg(0);
6270 ResultRegs[1] =
Hi.getReg(0);
6273 case TargetOpcode::G_LSHR:
6274 case TargetOpcode::G_ASHR: {
6276 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6278 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6279 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6280 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6284 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6287 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6288 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6290 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6294 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6296 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6298 ResultRegs[0] =
Lo.getReg(0);
6299 ResultRegs[1] =
Hi.getReg(0);
6306 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6307 MI.eraseFromParent();
6316 LLT TargetTy,
LLT ShiftAmtTy) {
6319 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6321 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6322 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6323 const bool NeedsInterWordShift = ShiftBits != 0;
6326 case TargetOpcode::G_SHL: {
6329 if (PartIdx < ShiftWords)
6332 unsigned SrcIdx = PartIdx - ShiftWords;
6333 if (!NeedsInterWordShift)
6334 return SrcParts[SrcIdx];
6339 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6343 return Hi.getReg(0);
6346 case TargetOpcode::G_LSHR: {
6347 unsigned SrcIdx = PartIdx + ShiftWords;
6348 if (SrcIdx >= NumParts)
6350 if (!NeedsInterWordShift)
6351 return SrcParts[SrcIdx];
6355 if (SrcIdx + 1 < NumParts) {
6356 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6360 return Lo.getReg(0);
6363 case TargetOpcode::G_ASHR: {
6365 unsigned SrcIdx = PartIdx + ShiftWords;
6366 if (SrcIdx >= NumParts)
6368 if (!NeedsInterWordShift)
6369 return SrcParts[SrcIdx];
6374 (SrcIdx == NumParts - 1)
6378 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6400 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6401 ?
static_cast<unsigned>(TargetOpcode::G_LSHR)
6406 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6415 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6416 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6418 auto IsZeroBitShift =
6426 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6427 : TargetOpcode::G_SHL;
6430 auto TargetBitsConst =
6432 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6437 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6442 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6444 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6448 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6461 LLT DstTy = MRI.getType(DstReg);
6465 const unsigned NumParts = DstBits / TargetBits;
6467 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6477 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6478 MI.eraseFromParent();
6483 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6484 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6490 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6494 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6497 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6498 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6502 for (
unsigned I = 0;
I < NumParts; ++
I)
6504 Params, TargetTy, ShiftAmtTy);
6506 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6507 MI.eraseFromParent();
6516 LLT DstTy = MRI.getType(DstReg);
6517 LLT ShiftAmtTy = MRI.getType(AmtReg);
6521 const unsigned NumParts = DstBits / TargetBits;
6523 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6540 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6552 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6553 auto TargetBitsLog2Const =
6554 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6555 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6558 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6560 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6568 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6569 auto TargetBitsMinusOneConst =
6570 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6572 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6573 TargetBitsMinusOneConst)
6576 FillValue = ZeroReg;
6584 for (
unsigned I = 0;
I < NumParts; ++
I) {
6586 Register InBoundsResult = FillValue;
6596 for (
unsigned K = 0; K < NumParts; ++K) {
6597 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6599 WordShift, WordShiftKConst);
6611 switch (
MI.getOpcode()) {
6612 case TargetOpcode::G_SHL:
6613 MainSrcIdx = (int)
I - (
int)K;
6614 CarrySrcIdx = MainSrcIdx - 1;
6616 case TargetOpcode::G_LSHR:
6617 case TargetOpcode::G_ASHR:
6618 MainSrcIdx = (int)
I + (
int)K;
6619 CarrySrcIdx = MainSrcIdx + 1;
6627 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6628 Register MainOp = SrcParts[MainSrcIdx];
6632 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6633 CarryOp = SrcParts[CarrySrcIdx];
6634 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6635 CarrySrcIdx >= (
int)NumParts)
6636 CarryOp = FillValue;
6642 ResultForK = FillValue;
6648 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6655 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6659 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6660 MI.eraseFromParent();
6667 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6670 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6685 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6690 "getNeutralElementForVecReduce called with invalid opcode!");
6691 case TargetOpcode::G_VECREDUCE_ADD:
6692 case TargetOpcode::G_VECREDUCE_OR:
6693 case TargetOpcode::G_VECREDUCE_XOR:
6694 case TargetOpcode::G_VECREDUCE_UMAX:
6696 case TargetOpcode::G_VECREDUCE_MUL:
6698 case TargetOpcode::G_VECREDUCE_AND:
6699 case TargetOpcode::G_VECREDUCE_UMIN:
6702 case TargetOpcode::G_VECREDUCE_SMAX:
6705 case TargetOpcode::G_VECREDUCE_SMIN:
6708 case TargetOpcode::G_VECREDUCE_FADD:
6710 case TargetOpcode::G_VECREDUCE_FMUL:
6712 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6713 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6714 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6715 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6723 unsigned Opc =
MI.getOpcode();
6725 case TargetOpcode::G_IMPLICIT_DEF:
6726 case TargetOpcode::G_LOAD: {
6734 case TargetOpcode::G_STORE:
6741 case TargetOpcode::G_AND:
6742 case TargetOpcode::G_OR:
6743 case TargetOpcode::G_XOR:
6744 case TargetOpcode::G_ADD:
6745 case TargetOpcode::G_SUB:
6746 case TargetOpcode::G_MUL:
6747 case TargetOpcode::G_FADD:
6748 case TargetOpcode::G_FSUB:
6749 case TargetOpcode::G_FMUL:
6750 case TargetOpcode::G_FDIV:
6751 case TargetOpcode::G_FCOPYSIGN:
6752 case TargetOpcode::G_UADDSAT:
6753 case TargetOpcode::G_USUBSAT:
6754 case TargetOpcode::G_SADDSAT:
6755 case TargetOpcode::G_SSUBSAT:
6756 case TargetOpcode::G_SMIN:
6757 case TargetOpcode::G_SMAX:
6758 case TargetOpcode::G_UMIN:
6759 case TargetOpcode::G_UMAX:
6760 case TargetOpcode::G_FMINNUM:
6761 case TargetOpcode::G_FMAXNUM:
6762 case TargetOpcode::G_FMINNUM_IEEE:
6763 case TargetOpcode::G_FMAXNUM_IEEE:
6764 case TargetOpcode::G_FMINIMUM:
6765 case TargetOpcode::G_FMAXIMUM:
6766 case TargetOpcode::G_FMINIMUMNUM:
6767 case TargetOpcode::G_FMAXIMUMNUM:
6768 case TargetOpcode::G_STRICT_FADD:
6769 case TargetOpcode::G_STRICT_FSUB:
6770 case TargetOpcode::G_STRICT_FMUL: {
6778 case TargetOpcode::G_SHL:
6779 case TargetOpcode::G_ASHR:
6780 case TargetOpcode::G_LSHR: {
6786 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6792 case TargetOpcode::G_FMA:
6793 case TargetOpcode::G_STRICT_FMA:
6794 case TargetOpcode::G_FSHR:
6795 case TargetOpcode::G_FSHL: {
6804 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6805 case TargetOpcode::G_EXTRACT:
6812 case TargetOpcode::G_INSERT:
6813 case TargetOpcode::G_INSERT_VECTOR_ELT:
6814 case TargetOpcode::G_FREEZE:
6815 case TargetOpcode::G_FNEG:
6816 case TargetOpcode::G_FABS:
6817 case TargetOpcode::G_FSQRT:
6818 case TargetOpcode::G_FCEIL:
6819 case TargetOpcode::G_FFLOOR:
6820 case TargetOpcode::G_FNEARBYINT:
6821 case TargetOpcode::G_FRINT:
6822 case TargetOpcode::G_INTRINSIC_ROUND:
6823 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6824 case TargetOpcode::G_INTRINSIC_TRUNC:
6825 case TargetOpcode::G_BITREVERSE:
6826 case TargetOpcode::G_BSWAP:
6827 case TargetOpcode::G_FCANONICALIZE:
6828 case TargetOpcode::G_SEXT_INREG:
6829 case TargetOpcode::G_ABS:
6830 case TargetOpcode::G_CTLZ:
6831 case TargetOpcode::G_CTPOP:
6839 case TargetOpcode::G_SELECT: {
6840 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6842 if (!CondTy.isScalar() ||
6848 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6850 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6855 if (CondTy.isVector())
6865 case TargetOpcode::G_UNMERGE_VALUES:
6867 case TargetOpcode::G_PHI:
6869 case TargetOpcode::G_SHUFFLE_VECTOR:
6871 case TargetOpcode::G_BUILD_VECTOR: {
6873 for (
auto Op :
MI.uses()) {
6881 MIRBuilder.buildDeleteTrailingVectorElements(
6882 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6883 MI.eraseFromParent();
6886 case TargetOpcode::G_SEXT:
6887 case TargetOpcode::G_ZEXT:
6888 case TargetOpcode::G_ANYEXT:
6889 case TargetOpcode::G_TRUNC:
6890 case TargetOpcode::G_FPTRUNC:
6891 case TargetOpcode::G_FPEXT:
6892 case TargetOpcode::G_FPTOSI:
6893 case TargetOpcode::G_FPTOUI:
6894 case TargetOpcode::G_FPTOSI_SAT:
6895 case TargetOpcode::G_FPTOUI_SAT:
6896 case TargetOpcode::G_SITOFP:
6897 case TargetOpcode::G_UITOFP: {
6904 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6907 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6915 case TargetOpcode::G_ICMP:
6916 case TargetOpcode::G_FCMP: {
6924 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6929 case TargetOpcode::G_BITCAST: {
6933 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6934 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
6950 case TargetOpcode::G_VECREDUCE_FADD:
6951 case TargetOpcode::G_VECREDUCE_FMUL:
6952 case TargetOpcode::G_VECREDUCE_ADD:
6953 case TargetOpcode::G_VECREDUCE_MUL:
6954 case TargetOpcode::G_VECREDUCE_AND:
6955 case TargetOpcode::G_VECREDUCE_OR:
6956 case TargetOpcode::G_VECREDUCE_XOR:
6957 case TargetOpcode::G_VECREDUCE_SMAX:
6958 case TargetOpcode::G_VECREDUCE_SMIN:
6959 case TargetOpcode::G_VECREDUCE_UMAX:
6960 case TargetOpcode::G_VECREDUCE_UMIN: {
6961 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
6963 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6964 auto NeutralElement = getNeutralElementForVecReduce(
6970 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
6971 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6972 NeutralElement, Idx);
6976 MO.
setReg(NewVec.getReg(0));
6988 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6990 unsigned MaskNumElts = Mask.size();
6991 unsigned SrcNumElts = SrcTy.getNumElements();
6994 if (MaskNumElts == SrcNumElts)
6997 if (MaskNumElts < SrcNumElts) {
7005 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7006 MI.getOperand(1).getReg(),
7007 MI.getOperand(2).getReg(), NewMask);
7008 MI.eraseFromParent();
7013 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
7014 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
7023 MOps1[0] =
MI.getOperand(1).getReg();
7024 MOps2[0] =
MI.getOperand(2).getReg();
7026 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
7027 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
7031 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
7033 if (Idx >=
static_cast<int>(SrcNumElts))
7034 Idx += PaddedMaskNumElts - SrcNumElts;
7039 if (MaskNumElts != PaddedMaskNumElts) {
7041 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7044 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
7046 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
7051 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7054 MI.eraseFromParent();
7060 unsigned int TypeIdx,
LLT MoreTy) {
7061 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
7063 unsigned NumElts = DstTy.getNumElements();
7066 if (DstTy.isVector() && Src1Ty.isVector() &&
7067 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7075 if (DstTy != Src1Ty || DstTy != Src2Ty)
7083 for (
unsigned I = 0;
I != NumElts; ++
I) {
7085 if (Idx <
static_cast<int>(NumElts))
7088 NewMask[
I] = Idx - NumElts + WidenNumElts;
7092 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7093 MI.getOperand(1).getReg(),
7094 MI.getOperand(2).getReg(), NewMask);
7095 MI.eraseFromParent();
7104 unsigned SrcParts = Src1Regs.
size();
7105 unsigned DstParts = DstRegs.
size();
7107 unsigned DstIdx = 0;
7109 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7110 DstRegs[DstIdx] = FactorSum;
7115 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7117 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7118 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7120 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7124 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7125 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7127 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7137 if (DstIdx != DstParts - 1) {
7138 MachineInstrBuilder Uaddo =
7139 B.buildUAddo(NarrowTy,
LLT::integer(1), Factors[0], Factors[1]);
7140 FactorSum = Uaddo.
getReg(0);
7141 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7142 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7143 MachineInstrBuilder Uaddo =
7144 B.buildUAddo(NarrowTy,
LLT::integer(1), FactorSum, Factors[i]);
7145 FactorSum = Uaddo.
getReg(0);
7146 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7147 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7151 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7152 for (
unsigned i = 2; i < Factors.
size(); ++i)
7153 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7156 CarrySumPrevDstIdx = CarrySum;
7157 DstRegs[DstIdx] = FactorSum;
7169 LLT DstType = MRI.getType(DstReg);
7171 if (DstType.isVector())
7174 unsigned Opcode =
MI.getOpcode();
7175 unsigned OpO, OpE, OpF;
7177 case TargetOpcode::G_SADDO:
7178 case TargetOpcode::G_SADDE:
7179 case TargetOpcode::G_UADDO:
7180 case TargetOpcode::G_UADDE:
7181 case TargetOpcode::G_ADD:
7182 OpO = TargetOpcode::G_UADDO;
7183 OpE = TargetOpcode::G_UADDE;
7184 OpF = TargetOpcode::G_UADDE;
7185 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7186 OpF = TargetOpcode::G_SADDE;
7188 case TargetOpcode::G_SSUBO:
7189 case TargetOpcode::G_SSUBE:
7190 case TargetOpcode::G_USUBO:
7191 case TargetOpcode::G_USUBE:
7192 case TargetOpcode::G_SUB:
7193 OpO = TargetOpcode::G_USUBO;
7194 OpE = TargetOpcode::G_USUBE;
7195 OpF = TargetOpcode::G_USUBE;
7196 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7197 OpF = TargetOpcode::G_SSUBE;
7204 unsigned NumDefs =
MI.getNumExplicitDefs();
7205 Register Src1 =
MI.getOperand(NumDefs).getReg();
7206 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7209 CarryDst =
MI.getOperand(1).getReg();
7210 if (
MI.getNumOperands() == NumDefs + 3)
7211 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7213 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7214 LLT LeftoverTy, DummyTy;
7216 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7221 int NarrowParts = Src1Regs.
size();
7222 Src1Regs.
append(Src1Left);
7223 Src2Regs.
append(Src2Left);
7226 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7228 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7231 if (i == e - 1 && CarryDst)
7232 CarryOut = CarryDst;
7234 CarryOut = MRI.createGenericVirtualRegister(
LLT::integer(1));
7237 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7238 {Src1Regs[i], Src2Regs[i]});
7239 }
else if (i == e - 1) {
7240 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7241 {Src1Regs[i], Src2Regs[i], CarryIn});
7243 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7244 {Src1Regs[i], Src2Regs[i], CarryIn});
7250 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7251 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7252 ArrayRef(DstRegs).drop_front(NarrowParts));
7254 MI.eraseFromParent();
7260 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7262 LLT Ty = MRI.getType(DstReg);
7266 unsigned Size = Ty.getSizeInBits();
7268 if (
Size % NarrowSize != 0)
7271 unsigned NumParts =
Size / NarrowSize;
7272 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7273 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7279 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7283 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7284 MI.eraseFromParent();
7294 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7297 LLT SrcTy = MRI.getType(Src);
7308 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7321 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7324 if (SizeOp1 % NarrowSize != 0)
7326 int NumParts = SizeOp1 / NarrowSize;
7329 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7333 uint64_t OpStart =
MI.getOperand(2).getImm();
7334 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7335 for (
int i = 0; i < NumParts; ++i) {
7336 unsigned SrcStart = i * NarrowSize;
7338 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7341 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7349 int64_t ExtractOffset;
7351 if (OpStart < SrcStart) {
7353 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7355 ExtractOffset = OpStart - SrcStart;
7356 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7360 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7362 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7363 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7370 if (MRI.getType(DstReg).isVector())
7371 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7372 else if (DstRegs.
size() > 1)
7373 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7376 MI.eraseFromParent();
7388 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7390 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7393 SrcRegs.
append(LeftoverRegs);
7397 uint64_t OpStart =
MI.getOperand(3).getImm();
7398 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7399 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7400 unsigned DstStart =
I * NarrowSize;
7402 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7410 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7412 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7416 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7424 int64_t ExtractOffset, InsertOffset;
7426 if (OpStart < DstStart) {
7428 ExtractOffset = DstStart - OpStart;
7429 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7431 InsertOffset = OpStart - DstStart;
7434 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7438 if (ExtractOffset != 0 || SegSize != OpSize) {
7440 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7441 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7444 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7445 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7453 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7456 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7458 MI.eraseFromParent();
7466 LLT DstTy = MRI.getType(DstReg);
7468 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7474 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7475 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7479 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7480 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7483 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7484 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7485 {Src0Regs[I], Src1Regs[I]});
7489 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7492 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7493 DstLeftoverRegs.
push_back(Inst.getReg(0));
7496 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7497 LeftoverTy, DstLeftoverRegs);
7499 MI.eraseFromParent();
7509 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7511 LLT DstTy = MRI.getType(DstReg);
7516 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7517 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7518 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7520 MI.eraseFromParent();
7530 Register CondReg =
MI.getOperand(1).getReg();
7531 LLT CondTy = MRI.getType(CondReg);
7532 if (CondTy.isVector())
7536 LLT DstTy = MRI.getType(DstReg);
7542 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7543 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7547 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7548 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7551 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7553 CondReg, Src1Regs[
I], Src2Regs[
I]);
7557 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7559 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7563 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7564 LeftoverTy, DstLeftoverRegs);
7566 MI.eraseFromParent();
7576 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7579 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7580 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7583 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7585 auto C_0 =
B.buildConstant(NarrowTy, 0);
7587 UnmergeSrc.getReg(1), C_0);
7588 auto LoCTLZ = IsUndef ?
7589 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7590 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7591 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7592 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7593 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7594 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7596 MI.eraseFromParent();
7609 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7612 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7613 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7616 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7618 auto C_0 =
B.buildConstant(NarrowTy, 0);
7620 UnmergeSrc.getReg(0), C_0);
7621 auto HiCTTZ = IsUndef ?
7622 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7623 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7624 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7625 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7626 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7627 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7629 MI.eraseFromParent();
7642 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7645 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7650 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7654 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7655 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7663 auto LoInv =
B.buildXor(DstTy,
Lo, Sign);
7664 auto LoCTLZ =
B.buildCTLZ(DstTy, LoInv);
7667 auto C_NarrowSizeM1 =
B.buildConstant(DstTy, NarrowSize - 1);
7668 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7670 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7672 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7674 MI.eraseFromParent();
7684 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7687 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7688 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7690 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7691 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7692 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7694 MI.eraseFromParent();
7709 LLT ExpTy = MRI.getType(ExpReg);
7714 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7715 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7716 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7717 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7719 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7721 MI.getOperand(2).setReg(Trunc.getReg(0));
7728 unsigned Opc =
MI.getOpcode();
7731 auto QAction = LI.getAction(Q).Action;
7737 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7740 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7744 case TargetOpcode::G_CTLZ: {
7745 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7746 unsigned Len = SrcTy.getScalarSizeInBits();
7748 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7750 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7751 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7754 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7755 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7756 MI.eraseFromParent();
7772 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7773 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7776 Op = MIBOp.getReg(0);
7781 MI.eraseFromParent();
7784 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7787 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7791 case TargetOpcode::G_CTTZ: {
7792 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7794 unsigned Len = SrcTy.getScalarSizeInBits();
7795 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7798 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7799 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7802 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7803 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7804 MI.eraseFromParent();
7811 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7812 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7814 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7815 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7816 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7817 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7820 MI.eraseFromParent();
7824 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7825 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7829 case TargetOpcode::G_CTPOP: {
7831 LLT Ty = MRI.getType(SrcReg);
7832 unsigned Size = Ty.getScalarSizeInBits();
7844 auto C_1 =
B.buildConstant(Ty, 1);
7845 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7847 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7848 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7849 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7853 auto C_2 =
B.buildConstant(Ty, 2);
7854 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7856 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7857 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7858 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7859 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7866 auto C_4 =
B.buildConstant(Ty, 4);
7867 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7868 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7870 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7871 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7873 assert(
Size <= 128 &&
"Scalar size is too large for CTPOP lower algorithm");
7876 if (
Size == 16 && !Ty.isVector()) {
7878 auto C_8 =
B.buildConstant(Ty, 8);
7879 auto HighSum =
B.buildLShr(Ty, B8Count, C_8);
7880 auto Res =
B.buildAdd(Ty, B8Count, HighSum);
7881 B.buildAnd(
MI.getOperand(0).getReg(), Res,
B.buildConstant(Ty, 0xFF));
7882 MI.eraseFromParent();
7891 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7893 auto IsMulSupported = [
this](
const LLT Ty) {
7894 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7897 if (IsMulSupported(Ty)) {
7898 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7899 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7901 auto ResTmp = B8Count;
7902 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7903 auto ShiftC =
B.buildConstant(Ty, Shift);
7904 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7905 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7907 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7909 MI.eraseFromParent();
7912 case TargetOpcode::G_CTLS: {
7913 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7917 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7918 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7920 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7926 MI.eraseFromParent();
7947 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7948 LLT Ty = MRI.getType(Dst);
7949 LLT ShTy = MRI.getType(Z);
7956 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7957 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7962 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
7963 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7967 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7980 MI.eraseFromParent();
7986 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7987 LLT Ty = MRI.getType(Dst);
7988 LLT ShTy = MRI.getType(Z);
7991 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8001 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8002 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8003 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
8004 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
8005 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
8009 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
8012 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
8015 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
8017 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8018 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8019 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
8022 auto One =
MIRBuilder.buildConstant(ShTy, 1);
8024 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
8026 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
8029 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
8030 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
8035 MI.eraseFromParent();
8046 LLT Ty = MRI.getType(Dst);
8047 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
8049 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8050 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8053 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
8054 return lowerFunnelShiftAsShifts(
MI);
8058 if (Result == UnableToLegalize)
8059 return lowerFunnelShiftAsShifts(
MI);
8064 auto [Dst, Src] =
MI.getFirst2Regs();
8065 LLT DstTy = MRI.getType(Dst);
8066 LLT SrcTy = MRI.getType(Src);
8070 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8078 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8082 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8086 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8091 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8092 {UnmergeSrc.getReg(0)});
8093 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8094 {UnmergeSrc.getReg(1)});
8097 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8099 MI.eraseFromParent();
8116 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8120 LLT DstTy = MRI.getType(DstReg);
8121 LLT SrcTy = MRI.getType(SrcReg);
8129 SrcTy.getElementCount().divideCoefficientBy(2));
8142 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8154 MI.eraseFromParent();
8163 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8164 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8165 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8166 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8167 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8168 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8169 MI.eraseFromParent();
8174 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8176 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8177 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8182 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8183 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8185 return lowerRotateWithReverseRotate(
MI);
8188 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8189 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8190 bool IsFShLegal =
false;
8191 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8192 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8196 MI.eraseFromParent();
8201 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8204 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8209 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8210 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8211 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8217 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8218 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8220 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8226 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8227 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8229 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8231 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8236 MI.eraseFromParent();
8244 auto [Dst, Src] =
MI.getFirst2Regs();
8249 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8277 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8290 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8294 MI.eraseFromParent();
8302 auto [Dst, Src] =
MI.getFirst2Regs();
8307 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8320 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8322 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8327 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8329 MI.eraseFromParent();
8337 auto [Dst, Src] =
MI.getFirst2Regs();
8341 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8352 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8353 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8355 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8362 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8363 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8364 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8366 MI.eraseFromParent();
8377 SrcTy.changeElementType(
LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8378 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8384 MI.eraseFromParent();
8389 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8392 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8393 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8394 MIRBuilder.buildSelect(Dst, Src, True, False);
8395 MI.eraseFromParent();
8399 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8419 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8426 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8427 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8428 MIRBuilder.buildSelect(Dst, Src, True, False);
8429 MI.eraseFromParent();
8433 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8439 if (DstTy.getScalarSizeInBits() == 32) {
8446 auto SignBit =
MIRBuilder.buildConstant(I64, 63);
8447 auto S =
MIRBuilder.buildAShr(I64, L, SignBit);
8449 auto LPlusS =
MIRBuilder.buildAdd(I64, L, S);
8456 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8457 MI.eraseFromParent();
8465 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8469 if (SrcTy !=
S64 && SrcTy !=
S32)
8471 if (DstTy !=
S32 && DstTy !=
S64)
8498 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8500 MI.eraseFromParent();
8505 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8510 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8517 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8519 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8520 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8522 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8523 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8525 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8527 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8528 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8529 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8532 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8533 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8534 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8536 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8539 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8544 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8545 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8551 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8553 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8554 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8556 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8561 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8562 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8564 MI.eraseFromParent();
8570 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8572 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8573 unsigned SatWidth = DstTy.getScalarSizeInBits();
8577 APInt MinInt, MaxInt;
8600 if (AreExactFloatBounds) {
8602 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8605 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8607 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8616 MI.eraseFromParent();
8621 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8626 MI.eraseFromParent();
8633 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8641 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8651 MI.eraseFromParent();
8657 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8661 MI.eraseFromParent();
8668 assert((
MI.getOpcode() == TargetOpcode::G_FPEXT ||
8669 MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
8670 "Only G_FPEXT and G_FPTRUNC are expected");
8672 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8677 if (
MI.getOpcode() == TargetOpcode::G_FPEXT) {
8679 StoreOpc = TargetOpcode::G_STORE;
8680 LoadOpc = TargetOpcode::G_FPEXTLOAD;
8683 StoreOpc = TargetOpcode::G_FPTRUNCSTORE;
8684 LoadOpc = TargetOpcode::G_LOAD;
8693 StackTy, StackTyAlign);
8694 MIRBuilder.buildStoreInstr(StoreOpc, SrcReg, StackTemp, *StoreMMO);
8697 StackTy, StackTyAlign);
8698 MIRBuilder.buildLoadInstr(LoadOpc, DstReg, StackTemp, *LoadMMO);
8700 MI.eraseFromParent();
8710 auto [Dst, Src] =
MI.getFirst2Regs();
8714 if (MRI.getType(Src).isVector())
8718 unsigned Flags =
MI.getFlags();
8721 MI.eraseFromParent();
8725 const unsigned ExpMask = 0x7ff;
8726 const unsigned ExpBiasf64 = 1023;
8727 const unsigned ExpBiasf16 = 15;
8756 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8816 MI.eraseFromParent();
8823 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8833 auto SrcI =
MIRBuilder.buildBitcast(I32Ty, SrcReg);
8855 auto Trunc =
MIRBuilder.buildTrunc(I16Ty, Srl);
8857 MI.eraseFromParent();
8863 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8864 if (DstTy.getScalarType().isFloat16() && SrcTy.getScalarType().isFloat64())
8867 if (DstTy.getScalarType().isBFloat16() && SrcTy.getScalarType().isFloat32())
8874 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8875 LLT Ty = MRI.getType(Dst);
8877 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8878 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8879 MI.eraseFromParent();
8884 auto [DstFrac, DstInt, Src] =
MI.getFirst3Regs();
8885 LLT Ty = MRI.getType(Src);
8886 auto Flags =
MI.getFlags();
8894 FracToUse = FracPart.getReg(0);
8896 auto Abs =
MIRBuilder.buildFAbs(Ty, Src, Flags);
8900 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8902 FracToUse =
Select.getReg(0);
8905 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8908 MI.eraseFromParent();
8914 case TargetOpcode::G_SMIN:
8916 case TargetOpcode::G_SMAX:
8918 case TargetOpcode::G_UMIN:
8920 case TargetOpcode::G_UMAX:
8928 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8933 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8934 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8936 MI.eraseFromParent();
8945 LLT DstTy = MRI.getType(Dst);
8946 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8956 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
8957 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8959 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8962 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
8963 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
8964 if (TLI.preferSelectsOverBooleanArithmetic(
8967 auto One =
MIRBuilder.buildConstant(DstTy, 1);
8968 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8970 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
8971 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8977 unsigned BoolExtOp =
8979 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8980 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8984 MI.eraseFromParent();
8990 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8991 const int Src0Size = Src0Ty.getScalarSizeInBits();
8992 const int Src1Size = Src1Ty.getScalarSizeInBits();
9002 if (!(Src0Ty.getScalarType().isAnyScalar() ||
9003 Src0Ty.getScalarType().isInteger()))
9004 Src0Int =
MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
9006 if (!(Src1Ty.getScalarType().isAnyScalar() ||
9007 Src1Ty.getScalarType().isInteger()))
9008 Src1Int =
MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
9013 auto NotSignBitMask =
MIRBuilder.buildConstant(
9017 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
9019 if (Src0Ty == Src1Ty) {
9020 And1 =
MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
9021 }
else if (Src0Size > Src1Size) {
9022 auto ShiftAmt =
MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
9023 auto Zext =
MIRBuilder.buildZExt(Src0IntTy, Src1Int);
9024 auto Shift =
MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
9025 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
9027 auto ShiftAmt =
MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
9028 auto Shift =
MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
9029 auto Trunc =
MIRBuilder.buildTrunc(Src0IntTy, Shift);
9030 And1 =
MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
9036 unsigned Flags =
MI.getFlags();
9041 if (DstTy == DstIntTy)
9042 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
9048 MI.eraseFromParent();
9059 switch (
MI.getOpcode()) {
9060 case TargetOpcode::G_FMINNUM:
9061 NewOp = TargetOpcode::G_FMINNUM_IEEE;
9063 case TargetOpcode::G_FMINIMUMNUM:
9064 NewOp = TargetOpcode::G_FMINNUM;
9066 case TargetOpcode::G_FMAXNUM:
9067 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
9069 case TargetOpcode::G_FMAXIMUMNUM:
9070 NewOp = TargetOpcode::G_FMAXNUM;
9076 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9077 LLT Ty = MRI.getType(Dst);
9086 if (!VT->isKnownNeverSNaN(Src0))
9087 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
9089 if (!VT->isKnownNeverSNaN(Src1))
9090 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
9095 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
9096 MI.eraseFromParent();
9102 unsigned Opc =
MI.getOpcode();
9103 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9104 LLT Ty = MRI.getType(Dst);
9107 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
9109 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
9110 unsigned OpcNonIeee =
9111 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
9112 bool MinMaxMustRespectOrderedZero =
false;
9116 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9118 MinMaxMustRespectOrderedZero =
true;
9119 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9124 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9129 (!VT->isKnownNeverNaN(Src0) || !VT->isKnownNeverNaN(Src1))) {
9132 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
9136 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9138 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9148 const unsigned Flags =
MI.getFlags();
9154 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9156 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9158 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9160 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9162 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9167 MI.eraseFromParent();
9174 LLT Ty = MRI.getType(DstReg);
9175 unsigned Flags =
MI.getFlags();
9180 MI.eraseFromParent();
9186 auto [DstReg,
X] =
MI.getFirst2Regs();
9187 const unsigned Flags =
MI.getFlags();
9188 const LLT Ty = MRI.getType(DstReg);
9200 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
9202 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
9207 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
9208 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9209 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9210 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9212 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9214 MI.eraseFromParent();
9219 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9220 unsigned Flags =
MI.getFlags();
9221 LLT Ty = MRI.getType(DstReg);
9228 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9229 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9232 SrcReg, Zero, Flags);
9234 SrcReg, Trunc, Flags);
9238 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9239 MI.eraseFromParent();
9245 const unsigned NumOps =
MI.getNumOperands();
9246 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9247 unsigned PartSize = Src0Ty.getSizeInBits();
9252 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9253 const unsigned Offset = (
I - 1) * PartSize;
9256 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9259 MRI.createGenericVirtualRegister(WideTy);
9262 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9263 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9264 ResultReg = NextResult;
9267 if (DstTy.isPointer()) {
9268 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9269 DstTy.getAddressSpace())) {
9277 MI.eraseFromParent();
9283 const unsigned NumDst =
MI.getNumOperands() - 1;
9284 Register SrcReg =
MI.getOperand(NumDst).getReg();
9285 Register Dst0Reg =
MI.getOperand(0).getReg();
9286 LLT DstTy = MRI.getType(Dst0Reg);
9295 LLT IntTy = MRI.getType(SrcReg);
9300 unsigned Offset = DstSize;
9301 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9303 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9307 MI.eraseFromParent();
9326 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9327 InsertVal =
MI.getOperand(2).getReg();
9329 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9331 LLT VecTy = MRI.getType(SrcVec);
9341 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9342 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9344 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9347 MI.eraseFromParent();
9352 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9363 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9370 int64_t
Offset = IdxVal * EltBytes;
9381 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9384 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9386 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9389 MI.eraseFromParent();
9395 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9396 MI.getFirst3RegLLTs();
9406 for (
int Idx : Mask) {
9408 if (!
Undef.isValid())
9414 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9416 int NumElts = Src0Ty.getNumElements();
9417 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9418 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9419 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9421 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9423 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9428 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9429 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9430 MI.eraseFromParent();
9436 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9437 MI.getFirst4RegLLTs();
9439 if (VecTy.isScalableVector())
9455 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9458 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9461 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9464 std::optional<APInt> PassthruSplatVal =
9467 if (PassthruSplatVal.has_value()) {
9469 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9470 }
else if (HasPassthru) {
9471 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9472 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9478 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9482 unsigned NumElmts = VecTy.getNumElements();
9483 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9485 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9488 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9491 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9496 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9498 if (HasPassthru &&
I == NumElmts - 1) {
9501 auto AllLanesSelected =
MIRBuilder.buildICmp(
9503 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9504 {OutPos, EndOfVector});
9508 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9510 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9515 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9517 MI.eraseFromParent();
9528 SPTmp =
MIRBuilder.buildCast(IntPtrTy, SPTmp);
9534 if (Alignment >
Align(1)) {
9537 auto AlignCst =
MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9546 const auto &MF = *
MI.getMF();
9552 Register AllocSize =
MI.getOperand(1).getReg();
9555 LLT PtrTy = MRI.getType(Dst);
9556 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9563 MI.eraseFromParent();
9569 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9574 MI.eraseFromParent();
9580 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9585 MI.eraseFromParent();
9591 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9592 unsigned Offset =
MI.getOperand(2).getImm();
9595 if (SrcTy.isVector()) {
9596 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9597 unsigned DstSize = DstTy.getSizeInBits();
9599 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9600 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9602 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9606 for (
unsigned Idx =
Offset / SrcEltSize;
9607 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9608 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9610 if (SubVectorElts.
size() == 1)
9611 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9613 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9615 MI.eraseFromParent();
9621 if ((SrcTy.isPointer() &&
9622 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9623 (DstTy.isPointer() &&
9624 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9625 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9629 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9630 (SrcTy.isScalar() || SrcTy.isPointer() ||
9631 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9632 LLT SrcIntTy = SrcTy;
9633 if (!SrcTy.isScalar()) {
9635 SrcReg =
MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9639 if (DstTy.isPointer())
9641 MRI.createGenericVirtualRegister(
LLT::scalar(DstTy.getSizeInBits()));
9647 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9651 if (DstTy.isPointer())
9654 MI.eraseFromParent();
9662 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9665 LLT DstTy = MRI.getType(Src);
9666 LLT InsertTy = MRI.getType(InsertSrc);
9669 bool IsNonIntegralInsert =
9679 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9680 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9687 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9689 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9693 for (; Idx <
Offset / EltSize; ++Idx) {
9694 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9699 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9700 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9702 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9706 InsertSrc =
MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9708 InsertSrc =
MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9715 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9718 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9719 MI.eraseFromParent();
9728 if (IsNonIntegralDst || IsNonIntegralInsert) {
9729 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9733 LLT IntDstTy = DstTy;
9737 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9742 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9748 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9754 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9755 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9756 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9759 MI.eraseFromParent();
9765 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9766 MI.getFirst4RegLLTs();
9767 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9770 LLT BoolTy = Dst1Ty;
9772 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9787 auto ResultLowerThanLHS =
9791 MIRBuilder.buildXor(Dst1, RHSNegative, ResultLowerThanLHS);
9795 auto LHSLessThanRHS =
9797 auto ResultNegative =
9799 MIRBuilder.buildXor(Dst1, LHSLessThanRHS, ResultNegative);
9803 MI.eraseFromParent();
9809 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9810 const LLT Ty = MRI.getType(Res);
9813 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9814 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9815 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9826 MI.eraseFromParent();
9831 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9832 const LLT Ty = MRI.getType(Res);
9835 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9836 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9837 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9842 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9847 MI.eraseFromParent();
9853 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9854 LLT Ty = MRI.getType(Res);
9858 switch (
MI.getOpcode()) {
9861 case TargetOpcode::G_UADDSAT:
9864 BaseOp = TargetOpcode::G_ADD;
9866 case TargetOpcode::G_SADDSAT:
9869 BaseOp = TargetOpcode::G_ADD;
9871 case TargetOpcode::G_USUBSAT:
9874 BaseOp = TargetOpcode::G_SUB;
9876 case TargetOpcode::G_SSUBSAT:
9879 BaseOp = TargetOpcode::G_SUB;
9894 uint64_t NumBits = Ty.getScalarSizeInBits();
9905 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9913 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9918 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9919 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9922 MI.eraseFromParent();
9928 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9929 LLT Ty = MRI.getType(Res);
9933 unsigned OverflowOp;
9934 switch (
MI.getOpcode()) {
9937 case TargetOpcode::G_UADDSAT:
9940 OverflowOp = TargetOpcode::G_UADDO;
9942 case TargetOpcode::G_SADDSAT:
9945 OverflowOp = TargetOpcode::G_SADDO;
9947 case TargetOpcode::G_USUBSAT:
9950 OverflowOp = TargetOpcode::G_USUBO;
9952 case TargetOpcode::G_SSUBSAT:
9955 OverflowOp = TargetOpcode::G_SSUBO;
9960 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9961 Register Tmp = OverflowRes.getReg(0);
9962 Register Ov = OverflowRes.getReg(1);
9971 uint64_t NumBits = Ty.getScalarSizeInBits();
9972 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
9973 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9976 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
9984 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9988 MI.eraseFromParent();
9994 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9995 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9996 "Expected shlsat opcode!");
9997 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9998 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9999 LLT Ty = MRI.getType(Res);
10003 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
10004 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
10013 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
10018 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
10020 MI.eraseFromParent();
10025 auto [Dst, Src] =
MI.getFirst2Regs();
10026 const LLT Ty = MRI.getType(Src);
10027 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
10028 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
10031 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
10032 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
10033 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10034 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
10037 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
10039 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
10040 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
10041 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
10043 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
10044 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
10045 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
10047 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10048 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
10049 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
10051 Res.getInstr()->getOperand(0).setReg(Dst);
10053 MI.eraseFromParent();
10060 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
10063 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
10064 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
10065 return B.buildOr(Dst,
LHS,
RHS);
10070 auto [Dst, Src] =
MI.getFirst2Regs();
10071 const LLT SrcTy = MRI.getType(Src);
10072 unsigned Size = SrcTy.getScalarSizeInBits();
10073 unsigned VSize = SrcTy.getSizeInBits();
10076 if (SrcTy.isVector() && (VSize % 8 == 0) &&
10077 (LI.isLegal({TargetOpcode::G_BITREVERSE,
10078 {LLT::fixed_vector(VSize / 8, LLT::integer(8)),
10079 LLT::fixed_vector(VSize / 8, LLT::integer(8))}}))) {
10084 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
10085 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
10086 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
10090 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
10113 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
10117 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
10120 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
10124 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
10128 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
10133 MI.eraseFromParent();
10141 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10142 int NameOpIdx = IsRead ? 1 : 0;
10143 int ValRegIndex = IsRead ? 0 : 1;
10145 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
10146 const LLT Ty = MRI.getType(ValReg);
10148 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10155 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
10156 Fn,
MI.getDebugLoc()));
10160 MI.eraseFromParent();
10169 MI.eraseFromParent();
10175 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
10176 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10177 Register Result =
MI.getOperand(0).getReg();
10178 LLT OrigTy = MRI.getType(Result);
10182 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
10183 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
10185 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10187 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
10188 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
10191 MI.eraseFromParent();
10197 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10202 MI.eraseFromParent();
10207 MI.eraseFromParent();
10214 unsigned BitSize = SrcTy.getScalarSizeInBits();
10218 auto AsInt = SrcTy == IntTy ?
MIRBuilder.buildCopy(IntTy, SrcReg)
10225 APInt ExpMask = Inf;
10227 APInt QNaNBitMask =
10231 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
10232 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
10233 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
10234 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
10235 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
10237 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10241 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
10243 LLT DstTyCopy = DstTy;
10245 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10273 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10276 Mask &= ~PartialCheck;
10285 else if (PartialCheck ==
fcZero)
10297 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10298 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10299 auto SubnormalRes =
10301 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10303 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10304 appendToRes(SubnormalRes);
10311 else if (PartialCheck ==
fcInf)
10316 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10323 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10324 if (PartialCheck ==
fcNan) {
10328 }
else if (PartialCheck ==
fcQNan) {
10338 Abs, InfWithQnanBitC);
10339 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10346 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10348 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10349 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10352 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10354 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10357 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10358 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10360 appendToRes(NormalRes);
10364 MI.eraseFromParent();
10370 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10371 MI.getFirst4RegLLTs();
10380 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10381 Op1Ty = MRI.getType(Op1Reg);
10382 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10383 Op2Ty = MRI.getType(Op2Reg);
10387 if (MaskTy.isScalar()) {
10395 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10398 MaskTy = DstTy.changeElementType(
LLT::integer(DstTy.getScalarSizeInBits()));
10400 MIRBuilder.buildSExtOrTrunc(MaskTy.getScalarType(), MaskElt).getReg(0);
10402 if (DstTy.isVector()) {
10404 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MaskTy, MaskElt);
10405 MaskReg = ShufSplat.getReg(0);
10409 }
else if (!DstTy.isVector()) {
10414 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10418 if (!Op1Ty.getScalarType().isAnyScalar() &&
10419 !Op1Ty.getScalarType().isInteger())
10420 Op1Reg =
MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10422 if (!Op2Ty.getScalarType().isAnyScalar() &&
10423 !Op2Ty.getScalarType().isInteger()) {
10425 Op2Ty.changeElementType(
LLT::integer(Op2Ty.getScalarSizeInBits()));
10426 Op2Reg =
MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10429 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10430 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10431 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10436 if (DstTy == Op1TyInt)
10439 auto Or =
MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10443 MI.eraseFromParent();
10449 unsigned Opcode =
MI.getOpcode();
10452 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10453 : TargetOpcode::G_UDIV,
10454 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10456 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10457 : TargetOpcode::G_UREM,
10458 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10459 MI.eraseFromParent();
10469 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10473 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10476 MI.eraseFromParent();
10486 Register SrcReg =
MI.getOperand(1).getReg();
10487 LLT Ty = MRI.getType(SrcReg);
10488 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10491 MI.eraseFromParent();
10497 Register SrcReg =
MI.getOperand(1).getReg();
10498 Register DestReg =
MI.getOperand(0).getReg();
10500 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10501 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10504 MI.eraseFromParent();
10510 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10511 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10512 "Expected G_ABDS or G_ABDU instruction");
10514 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10515 LLT Ty = MRI.getType(LHS);
10525 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10527 MI.eraseFromParent();
10533 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10534 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10535 "Expected G_ABDS or G_ABDU instruction");
10537 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10538 LLT Ty = MRI.getType(LHS);
10543 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10544 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10545 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10547 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10548 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10550 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10552 MI.eraseFromParent();
10557 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10562 if (!(SrcTy.getScalarType().isAnyScalar() ||
10563 SrcTy.getScalarType().isInteger())) {
10565 SrcTy.changeElementType(
LLT::integer(SrcTy.getScalarSizeInBits()));
10566 CastedSrc =
MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10569 if (MRI.getType(DstReg) != TyInt) {
10573 .buildAnd(TyInt, CastedSrc,
10576 DstTy.getScalarSizeInBits())))
10588 MI.eraseFromParent();
10594 Register SrcReg =
MI.getOperand(1).getReg();
10595 LLT SrcTy = MRI.getType(SrcReg);
10596 LLT DstTy = MRI.getType(SrcReg);
10599 if (SrcTy.isScalar()) {
10604 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10615 Register ListPtr =
MI.getOperand(1).getReg();
10616 LLT PtrTy = MRI.getType(ListPtr);
10623 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10625 const Align A(
MI.getOperand(2).getImm());
10627 if (
A > TLI.getMinStackArgumentAlignment()) {
10629 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10630 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10631 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10632 VAList = AndDst.getReg(0);
10639 LLT LLTTy = MRI.getType(Dst);
10642 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10643 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10648 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10650 Align EltAlignment =
DL.getABITypeAlign(Ty);
10653 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10655 MI.eraseFromParent();
10670 unsigned Limit,
const MemOp &
Op,
10671 unsigned DstAS,
unsigned SrcAS,
10672 const AttributeList &FuncAttributes,
10674 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
10684 if (
Op.isFixedDstAlign())
10685 while (
Op.getDstAlign() < Ty.getSizeInBytes() &&
10688 assert(Ty.getSizeInBits() > 0 &&
"Could not find valid type");
10692 unsigned NumMemOps = 0;
10695 unsigned TySize = Ty.getSizeInBytes();
10696 while (TySize >
Size) {
10706 assert(NewTySize > 0 &&
"Could not find appropriate type");
10713 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
10715 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
10721 TySize = NewTySize;
10725 if (++NumMemOps > Limit)
10728 MemOps.push_back(Ty);
10738 unsigned NumBits = Ty.getScalarSizeInBits();
10740 if (!Ty.isVector() && ValVRegAndVal) {
10741 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10749 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10770 uint64_t KnownLen,
Align Alignment,
10772 auto &MF = *
MI.getParent()->getParent();
10777 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10779 bool DstAlignCanChange =
false;
10783 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10785 DstAlignCanChange =
true;
10787 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10788 std::vector<LLT> MemOps;
10790 const auto &DstMMO = **
MI.memoperands_begin();
10791 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10794 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10805 if (DstAlignCanChange) {
10808 Align NewAlign =
DL.getABITypeAlign(IRTy);
10809 if (NewAlign > Alignment) {
10810 Alignment = NewAlign;
10818 MachineIRBuilder MIB(
MI);
10820 LLT LargestTy = MemOps[0];
10821 for (
unsigned i = 1; i < MemOps.size(); i++)
10823 LargestTy = MemOps[i];
10835 LLT PtrTy = MRI.getType(Dst);
10836 unsigned DstOff = 0;
10837 unsigned Size = KnownLen;
10838 for (
unsigned I = 0;
I < MemOps.size();
I++) {
10839 LLT Ty = MemOps[
I];
10841 if (TySize >
Size) {
10844 assert(
I == MemOps.size() - 1 &&
I != 0);
10845 DstOff -= TySize -
Size;
10855 TLI.isTruncateFree(LargestVT, VT))
10856 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10869 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10872 MIB.buildStore(
Value, Ptr, *StoreMMO);
10877 MI.eraseFromParent();
10883 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10885 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10887 const auto *MMOIt =
MI.memoperands_begin();
10889 bool IsVolatile =
MemOp->isVolatile();
10895 "inline memcpy with dynamic size is not yet supported");
10896 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10897 if (KnownLen == 0) {
10898 MI.eraseFromParent();
10902 const auto &DstMMO = **
MI.memoperands_begin();
10903 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10904 Align DstAlign = DstMMO.getBaseAlign();
10905 Align SrcAlign = SrcMMO.getBaseAlign();
10907 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10914 Align SrcAlign,
bool IsVolatile) {
10915 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10916 return lowerMemcpy(
MI, Dst, Src, KnownLen,
10917 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10924 Align SrcAlign,
bool IsVolatile) {
10925 auto &MF = *
MI.getParent()->getParent();
10930 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10932 bool DstAlignCanChange =
false;
10934 Align Alignment = std::min(DstAlign, SrcAlign);
10938 DstAlignCanChange =
true;
10944 std::vector<LLT> MemOps;
10946 const auto &DstMMO = **
MI.memoperands_begin();
10947 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10953 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10959 if (DstAlignCanChange) {
10962 Align NewAlign =
DL.getABITypeAlign(IRTy);
10967 if (!
TRI->hasStackRealignment(MF))
10969 NewAlign = std::min(NewAlign, *StackAlign);
10971 if (NewAlign > Alignment) {
10972 Alignment = NewAlign;
10980 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
10982 MachineIRBuilder MIB(
MI);
10988 unsigned CurrOffset = 0;
10989 unsigned Size = KnownLen;
10990 for (
auto CopyTy : MemOps) {
10993 if (CopyTy.getSizeInBytes() >
Size)
10994 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
11005 if (CurrOffset != 0) {
11006 LLT SrcTy = MRI.getType(Src);
11010 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11012 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
11016 if (CurrOffset != 0) {
11017 LLT DstTy = MRI.getType(Dst);
11018 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11020 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
11021 CurrOffset += CopyTy.getSizeInBytes();
11022 Size -= CopyTy.getSizeInBytes();
11025 MI.eraseFromParent();
11031 uint64_t KnownLen,
Align DstAlign,
Align SrcAlign,
11033 auto &MF = *
MI.getParent()->getParent();
11038 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
11040 bool DstAlignCanChange =
false;
11043 Align Alignment = std::min(DstAlign, SrcAlign);
11045 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
11047 DstAlignCanChange =
true;
11049 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
11050 std::vector<LLT> MemOps;
11052 const auto &DstMMO = **
MI.memoperands_begin();
11053 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
11054 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
11055 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
11062 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
11068 if (DstAlignCanChange) {
11071 Align NewAlign =
DL.getABITypeAlign(IRTy);
11076 if (!
TRI->hasStackRealignment(MF))
11077 if (MaybeAlign StackAlign =
DL.getStackAlignment())
11078 NewAlign = std::min(NewAlign, *StackAlign);
11080 if (NewAlign > Alignment) {
11081 Alignment = NewAlign;
11089 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
11091 MachineIRBuilder MIB(
MI);
11095 unsigned CurrOffset = 0;
11096 SmallVector<Register, 16> LoadVals;
11097 for (
auto CopyTy : MemOps) {
11104 if (CurrOffset != 0) {
11105 LLT SrcTy = MRI.getType(Src);
11108 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11110 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
11111 CurrOffset += CopyTy.getSizeInBytes();
11115 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
11116 LLT CopyTy = MemOps[
I];
11122 if (CurrOffset != 0) {
11123 LLT DstTy = MRI.getType(Dst);
11126 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11128 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
11131 MI.eraseFromParent();
11137 const unsigned Opc =
MI.getOpcode();
11140 assert((
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMMOVE ||
11141 Opc == TargetOpcode::G_MEMSET) &&
11142 "Expected memcpy like instruction");
11144 auto MMOIt =
MI.memoperands_begin();
11149 auto [Dst, Src, Len] =
MI.getFirst3Regs();
11151 if (
Opc != TargetOpcode::G_MEMSET) {
11152 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
11153 MemOp = *(++MMOIt);
11154 SrcAlign =
MemOp->getBaseAlign();
11159 if (!LenVRegAndVal)
11161 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
11163 if (KnownLen == 0) {
11164 MI.eraseFromParent();
11168 if (MaxLen && KnownLen > MaxLen)
11171 bool IsVolatile =
MemOp->isVolatile();
11172 if (
Opc == TargetOpcode::G_MEMCPY) {
11173 auto &MF = *
MI.getParent()->getParent();
11176 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
11177 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
11180 if (
Opc == TargetOpcode::G_MEMMOVE)
11181 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
11182 if (
Opc == TargetOpcode::G_MEMSET)
11183 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getValueReg() const
Get the stored value register.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F32_TO_BF16(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
@ Custom
The result value requires a custom uniformity check.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.