43#define DEBUG_TYPE "legalizer"
56static std::pair<int, int>
62 unsigned NumParts =
Size / NarrowSize;
63 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
66 if (LeftoverSize == 0)
71 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
88 switch (Ty.getSizeInBits()) {
129 auto Step = LI.getAction(
MI, MRI);
130 switch (Step.Action) {
145 return bitcast(
MI, Step.TypeIdx, Step.NewType);
148 return lower(
MI, Step.TypeIdx, Step.NewType);
157 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
165void LegalizerHelper::insertParts(
Register DstReg,
187 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
189 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
190 return mergeMixedSubvectors(DstReg, AllRegs);
196 extractGCDType(GCDRegs, GCDTy, PartReg);
197 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
198 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
203 LLT Ty = MRI.getType(
Reg);
211void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
214 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
215 appendVectorElts(AllElts, PartRegs[i]);
218 if (!MRI.getType(Leftover).isVector())
221 appendVectorElts(AllElts, Leftover);
223 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
229 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
231 const int StartIdx = Regs.
size();
232 const int NumResults =
MI.getNumOperands() - 1;
234 for (
int I = 0;
I != NumResults; ++
I)
235 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
240 LLT SrcTy = MRI.getType(SrcReg);
241 if (SrcTy == GCDTy) {
247 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
254 LLT SrcTy = MRI.getType(SrcReg);
256 extractGCDType(Parts, GCDTy, SrcReg);
260LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
262 unsigned PadStrategy) {
267 int NumOrigSrc = VRegs.
size();
273 if (NumOrigSrc < NumParts * NumSubParts) {
274 if (PadStrategy == TargetOpcode::G_ZEXT)
275 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
276 else if (PadStrategy == TargetOpcode::G_ANYEXT)
277 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
279 assert(PadStrategy == TargetOpcode::G_SEXT);
284 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
300 for (
int I = 0;
I != NumParts; ++
I) {
301 bool AllMergePartsArePadding =
true;
304 for (
int J = 0; J != NumSubParts; ++J) {
305 int Idx =
I * NumSubParts + J;
306 if (Idx >= NumOrigSrc) {
307 SubMerge[J] = PadReg;
311 SubMerge[J] = VRegs[Idx];
314 AllMergePartsArePadding =
false;
320 if (AllMergePartsArePadding && !AllPadReg) {
321 if (PadStrategy == TargetOpcode::G_ANYEXT)
322 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
323 else if (PadStrategy == TargetOpcode::G_ZEXT)
324 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
333 Remerge[
I] = AllPadReg;
337 if (NumSubParts == 1)
338 Remerge[
I] = SubMerge[0];
340 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
343 if (AllMergePartsArePadding && !AllPadReg)
344 AllPadReg = Remerge[
I];
347 VRegs = std::move(Remerge);
351void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
353 LLT DstTy = MRI.getType(DstReg);
358 if (DstTy == LCMTy) {
359 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
363 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
372 UnmergeDefs[0] = DstReg;
373 for (
unsigned I = 1;
I != NumDefs; ++
I)
374 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
377 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
385#define RTLIBCASE_INT(LibcallPrefix) \
389 return RTLIB::LibcallPrefix##32; \
391 return RTLIB::LibcallPrefix##64; \
393 return RTLIB::LibcallPrefix##128; \
395 llvm_unreachable("unexpected size"); \
399#define RTLIBCASE(LibcallPrefix) \
403 return RTLIB::LibcallPrefix##32; \
405 return RTLIB::LibcallPrefix##64; \
407 return RTLIB::LibcallPrefix##80; \
409 return RTLIB::LibcallPrefix##128; \
411 llvm_unreachable("unexpected size"); \
416 case TargetOpcode::G_LROUND:
418 case TargetOpcode::G_LLROUND:
420 case TargetOpcode::G_MUL:
422 case TargetOpcode::G_SDIV:
424 case TargetOpcode::G_UDIV:
426 case TargetOpcode::G_SREM:
428 case TargetOpcode::G_UREM:
430 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
432 case TargetOpcode::G_FADD:
434 case TargetOpcode::G_FSUB:
436 case TargetOpcode::G_FMUL:
438 case TargetOpcode::G_FDIV:
440 case TargetOpcode::G_FEXP:
442 case TargetOpcode::G_FEXP2:
444 case TargetOpcode::G_FEXP10:
446 case TargetOpcode::G_FREM:
448 case TargetOpcode::G_FPOW:
450 case TargetOpcode::G_FPOWI:
452 case TargetOpcode::G_FMA:
454 case TargetOpcode::G_FSIN:
456 case TargetOpcode::G_FCOS:
458 case TargetOpcode::G_FTAN:
460 case TargetOpcode::G_FASIN:
462 case TargetOpcode::G_FACOS:
464 case TargetOpcode::G_FATAN:
466 case TargetOpcode::G_FATAN2:
468 case TargetOpcode::G_FSINH:
470 case TargetOpcode::G_FCOSH:
472 case TargetOpcode::G_FTANH:
474 case TargetOpcode::G_FSINCOS:
476 case TargetOpcode::G_FMODF:
478 case TargetOpcode::G_FLOG10:
480 case TargetOpcode::G_FLOG:
482 case TargetOpcode::G_FLOG2:
484 case TargetOpcode::G_FLDEXP:
486 case TargetOpcode::G_FCEIL:
488 case TargetOpcode::G_FFLOOR:
490 case TargetOpcode::G_FMINNUM:
492 case TargetOpcode::G_FMAXNUM:
494 case TargetOpcode::G_FMINIMUMNUM:
496 case TargetOpcode::G_FMAXIMUMNUM:
498 case TargetOpcode::G_FSQRT:
500 case TargetOpcode::G_FRINT:
502 case TargetOpcode::G_FNEARBYINT:
504 case TargetOpcode::G_INTRINSIC_TRUNC:
506 case TargetOpcode::G_INTRINSIC_ROUND:
508 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
510 case TargetOpcode::G_INTRINSIC_LRINT:
512 case TargetOpcode::G_INTRINSIC_LLRINT:
532 AttributeList CallerAttrs =
F.getAttributes();
533 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
534 .removeAttribute(Attribute::NoAlias)
535 .removeAttribute(Attribute::NonNull)
540 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
541 CallerAttrs.hasRetAttr(Attribute::SExt))
552 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
559 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
567 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
570 if (Ret->getNumImplicitOperands() != 1)
573 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
590 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
595 Info.OrigRet = Result;
598 (Result.Ty->isVoidTy() ||
599 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
607 if (
MI && Info.LoweredTailCall) {
608 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
618 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
619 "Expected instr following MI to be return or debug inst?");
622 Next->eraseFromParent();
623 }
while (
MI->getNextNode());
638 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
639 if (LibcallImpl == RTLIB::Unsupported)
643 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
657 Args.push_back({MO.getReg(), OpType, 0});
676 unsigned AddrSpace =
DL.getAllocaAddrSpace();
694 if (LibcallResult != LegalizeResult::Legalized)
702 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
703 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
704 MI.eraseFromParent();
719 LLT DstTy = MRI.getType(DstFrac);
724 unsigned AddrSpace =
DL.getAllocaAddrSpace();
725 MachinePointerInfo PtrInfo;
734 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
737 if (LibcallResult != LegalizeResult::Legalized)
743 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
744 MI.eraseFromParent();
755 case TargetOpcode::G_FPEXT:
757 case TargetOpcode::G_FPTRUNC:
759 case TargetOpcode::G_FPTOSI:
761 case TargetOpcode::G_FPTOUI:
763 case TargetOpcode::G_SITOFP:
765 case TargetOpcode::G_UITOFP:
775 if (FromType->isIntegerTy()) {
776 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
777 Arg.
Flags[0].setSExt();
779 Arg.
Flags[0].setZExt();
790 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
794 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
798 LLT OpLLT = MRI.getType(Reg);
799 Type *OpTy =
nullptr;
804 Args.push_back({Reg, OpTy, 0});
807 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
808 RTLIB::Libcall RTLibcall;
809 unsigned Opc =
MI.getOpcode();
811 case TargetOpcode::G_BZERO:
812 RTLibcall = RTLIB::BZERO;
814 case TargetOpcode::G_MEMCPY:
815 RTLibcall = RTLIB::MEMCPY;
816 Args[0].Flags[0].setReturned();
818 case TargetOpcode::G_MEMMOVE:
819 RTLibcall = RTLIB::MEMMOVE;
820 Args[0].Flags[0].setReturned();
822 case TargetOpcode::G_MEMSET:
823 RTLibcall = RTLIB::MEMSET;
824 Args[0].Flags[0].setReturned();
833 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
836 if (RTLibcallImpl == RTLIB::Unsupported) {
843 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
850 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
857 if (Info.LoweredTailCall) {
858 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
868 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
869 "Expected instr following MI to be return or debug inst?");
872 Next->eraseFromParent();
873 }
while (
MI.getNextNode());
883 unsigned Opc =
MI.getOpcode();
885 auto &MMO = AtomicMI.getMMO();
886 auto Ordering = MMO.getMergedOrdering();
887 LLT MemType = MMO.getMemoryType();
890 return RTLIB::UNKNOWN_LIBCALL;
892#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
894 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
896 case TargetOpcode::G_ATOMIC_CMPXCHG:
897 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
899 return getOutlineAtomicHelper(LC, Ordering, MemSize);
901 case TargetOpcode::G_ATOMICRMW_XCHG: {
902 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
903 return getOutlineAtomicHelper(LC, Ordering, MemSize);
905 case TargetOpcode::G_ATOMICRMW_ADD:
906 case TargetOpcode::G_ATOMICRMW_SUB: {
907 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
908 return getOutlineAtomicHelper(LC, Ordering, MemSize);
910 case TargetOpcode::G_ATOMICRMW_AND: {
911 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
912 return getOutlineAtomicHelper(LC, Ordering, MemSize);
914 case TargetOpcode::G_ATOMICRMW_OR: {
915 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
916 return getOutlineAtomicHelper(LC, Ordering, MemSize);
918 case TargetOpcode::G_ATOMICRMW_XOR: {
919 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
920 return getOutlineAtomicHelper(LC, Ordering, MemSize);
923 return RTLIB::UNKNOWN_LIBCALL;
936 unsigned Opc =
MI.getOpcode();
938 case TargetOpcode::G_ATOMIC_CMPXCHG:
939 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
942 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
943 MI.getFirst4RegLLTs();
946 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
947 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
948 NewLLT) =
MI.getFirst5RegLLTs();
958 case TargetOpcode::G_ATOMICRMW_XCHG:
959 case TargetOpcode::G_ATOMICRMW_ADD:
960 case TargetOpcode::G_ATOMICRMW_SUB:
961 case TargetOpcode::G_ATOMICRMW_AND:
962 case TargetOpcode::G_ATOMICRMW_OR:
963 case TargetOpcode::G_ATOMICRMW_XOR: {
964 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
967 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
971 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
986 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
988 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
991 if (RTLibcallImpl == RTLIB::Unsupported) {
998 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1012static RTLIB::Libcall
1014 RTLIB::Libcall RTLibcall;
1015 switch (
MI.getOpcode()) {
1016 case TargetOpcode::G_GET_FPENV:
1017 RTLibcall = RTLIB::FEGETENV;
1019 case TargetOpcode::G_SET_FPENV:
1020 case TargetOpcode::G_RESET_FPENV:
1021 RTLibcall = RTLIB::FESETENV;
1023 case TargetOpcode::G_GET_FPMODE:
1024 RTLibcall = RTLIB::FEGETMODE;
1026 case TargetOpcode::G_SET_FPMODE:
1027 case TargetOpcode::G_RESET_FPMODE:
1028 RTLibcall = RTLIB::FESETMODE;
1060 LLT StateTy = MRI.getType(Dst);
1063 MachinePointerInfo TempPtrInfo;
1067 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1072 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1080 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1098 LLT StateTy = MRI.getType(Src);
1101 MachinePointerInfo TempPtrInfo;
1110 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1115 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1116 LocObserver,
nullptr);
1122static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1124#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1128 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1130 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1132 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1134 llvm_unreachable("unexpected size"); \
1165 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1168 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1175 LLT DstTy = MRI.getType(DstReg);
1176 const auto Cond =
Cmp->getCond();
1181 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1186 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1190 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1197 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1203 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1205 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1218 const auto [OeqLibcall, OeqPred] =
1220 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1222 const auto [UnoLibcall, UnoPred] =
1224 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1239 const auto [OeqLibcall, OeqPred] =
1244 const auto [UnoLibcall, UnoPred] =
1249 if (NotOeq && NotUno)
1268 const auto [InversedLibcall, InversedPred] =
1270 if (!BuildLibcall(InversedLibcall,
1295 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1297 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1300 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1306 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1311 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1313 switch (
MI.getOpcode()) {
1316 case TargetOpcode::G_MUL:
1317 case TargetOpcode::G_SDIV:
1318 case TargetOpcode::G_UDIV:
1319 case TargetOpcode::G_SREM:
1320 case TargetOpcode::G_UREM:
1321 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1322 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1330 case TargetOpcode::G_FADD:
1331 case TargetOpcode::G_FSUB:
1332 case TargetOpcode::G_FMUL:
1333 case TargetOpcode::G_FDIV:
1334 case TargetOpcode::G_FMA:
1335 case TargetOpcode::G_FPOW:
1336 case TargetOpcode::G_FREM:
1337 case TargetOpcode::G_FCOS:
1338 case TargetOpcode::G_FSIN:
1339 case TargetOpcode::G_FTAN:
1340 case TargetOpcode::G_FACOS:
1341 case TargetOpcode::G_FASIN:
1342 case TargetOpcode::G_FATAN:
1343 case TargetOpcode::G_FATAN2:
1344 case TargetOpcode::G_FCOSH:
1345 case TargetOpcode::G_FSINH:
1346 case TargetOpcode::G_FTANH:
1347 case TargetOpcode::G_FLOG10:
1348 case TargetOpcode::G_FLOG:
1349 case TargetOpcode::G_FLOG2:
1350 case TargetOpcode::G_FEXP:
1351 case TargetOpcode::G_FEXP2:
1352 case TargetOpcode::G_FEXP10:
1353 case TargetOpcode::G_FCEIL:
1354 case TargetOpcode::G_FFLOOR:
1355 case TargetOpcode::G_FMINNUM:
1356 case TargetOpcode::G_FMAXNUM:
1357 case TargetOpcode::G_FMINIMUMNUM:
1358 case TargetOpcode::G_FMAXIMUMNUM:
1359 case TargetOpcode::G_FSQRT:
1360 case TargetOpcode::G_FRINT:
1361 case TargetOpcode::G_FNEARBYINT:
1362 case TargetOpcode::G_INTRINSIC_TRUNC:
1363 case TargetOpcode::G_INTRINSIC_ROUND:
1364 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1365 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1369 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1377 case TargetOpcode::G_FSINCOS: {
1378 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1382 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1387 case TargetOpcode::G_FMODF: {
1388 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1392 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1397 case TargetOpcode::G_LROUND:
1398 case TargetOpcode::G_LLROUND:
1399 case TargetOpcode::G_INTRINSIC_LRINT:
1400 case TargetOpcode::G_INTRINSIC_LLRINT: {
1401 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1405 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1407 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1413 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1416 MI.eraseFromParent();
1419 case TargetOpcode::G_FPOWI:
1420 case TargetOpcode::G_FLDEXP: {
1421 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1425 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1427 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1432 {
MI.getOperand(1).getReg(), HLTy, 0},
1433 {
MI.getOperand(2).getReg(), ITy, 1}};
1434 Args[1].Flags[0].setSExt();
1436 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1441 case TargetOpcode::G_FPEXT:
1442 case TargetOpcode::G_FPTRUNC: {
1445 if (!FromTy || !ToTy)
1452 case TargetOpcode::G_FCMP: {
1456 MI.eraseFromParent();
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1464 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1468 FromTy, LocObserver);
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1480 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1487 case TargetOpcode::G_ATOMICRMW_XCHG:
1488 case TargetOpcode::G_ATOMICRMW_ADD:
1489 case TargetOpcode::G_ATOMICRMW_SUB:
1490 case TargetOpcode::G_ATOMICRMW_AND:
1491 case TargetOpcode::G_ATOMICRMW_OR:
1492 case TargetOpcode::G_ATOMICRMW_XOR:
1493 case TargetOpcode::G_ATOMIC_CMPXCHG:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1500 case TargetOpcode::G_BZERO:
1501 case TargetOpcode::G_MEMCPY:
1502 case TargetOpcode::G_MEMMOVE:
1503 case TargetOpcode::G_MEMSET: {
1508 MI.eraseFromParent();
1511 case TargetOpcode::G_GET_FPENV:
1512 case TargetOpcode::G_GET_FPMODE: {
1518 case TargetOpcode::G_SET_FPENV:
1519 case TargetOpcode::G_SET_FPMODE: {
1525 case TargetOpcode::G_RESET_FPENV:
1526 case TargetOpcode::G_RESET_FPMODE: {
1534 MI.eraseFromParent();
1541 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1544 switch (
MI.getOpcode()) {
1547 case TargetOpcode::G_IMPLICIT_DEF: {
1549 LLT DstTy = MRI.getType(DstReg);
1557 if (SizeOp0 % NarrowSize != 0) {
1562 MI.eraseFromParent();
1566 int NumParts = SizeOp0 / NarrowSize;
1569 for (
int i = 0; i < NumParts; ++i)
1573 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1575 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1576 MI.eraseFromParent();
1579 case TargetOpcode::G_CONSTANT: {
1580 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1581 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1582 unsigned TotalSize = Ty.getSizeInBits();
1584 int NumParts = TotalSize / NarrowSize;
1587 for (
int I = 0;
I != NumParts; ++
I) {
1588 unsigned Offset =
I * NarrowSize;
1595 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1597 if (LeftoverBits != 0) {
1601 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1605 insertParts(
MI.getOperand(0).getReg(),
1606 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1608 MI.eraseFromParent();
1611 case TargetOpcode::G_SEXT:
1612 case TargetOpcode::G_ZEXT:
1613 case TargetOpcode::G_ANYEXT:
1615 case TargetOpcode::G_TRUNC: {
1619 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1621 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1625 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1626 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1627 MI.eraseFromParent();
1630 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1631 case TargetOpcode::G_FREEZE: {
1635 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1640 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1642 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1644 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1648 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1649 MI.eraseFromParent();
1652 case TargetOpcode::G_ADD:
1653 case TargetOpcode::G_SUB:
1654 case TargetOpcode::G_SADDO:
1655 case TargetOpcode::G_SSUBO:
1656 case TargetOpcode::G_SADDE:
1657 case TargetOpcode::G_SSUBE:
1658 case TargetOpcode::G_UADDO:
1659 case TargetOpcode::G_USUBO:
1660 case TargetOpcode::G_UADDE:
1661 case TargetOpcode::G_USUBE:
1663 case TargetOpcode::G_MUL:
1664 case TargetOpcode::G_UMULH:
1666 case TargetOpcode::G_EXTRACT:
1668 case TargetOpcode::G_INSERT:
1670 case TargetOpcode::G_LOAD: {
1672 Register DstReg = LoadMI.getDstReg();
1673 LLT DstTy = MRI.getType(DstReg);
1677 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1678 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1679 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1681 LoadMI.eraseFromParent();
1687 case TargetOpcode::G_ZEXTLOAD:
1688 case TargetOpcode::G_SEXTLOAD: {
1690 Register DstReg = LoadMI.getDstReg();
1691 Register PtrReg = LoadMI.getPointerReg();
1693 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1694 auto &MMO = LoadMI.getMMO();
1697 if (MemSize == NarrowSize) {
1699 }
else if (MemSize < NarrowSize) {
1700 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1701 }
else if (MemSize > NarrowSize) {
1711 LoadMI.eraseFromParent();
1714 case TargetOpcode::G_STORE: {
1717 Register SrcReg = StoreMI.getValueReg();
1718 LLT SrcTy = MRI.getType(SrcReg);
1719 if (SrcTy.isVector())
1722 int NumParts = SizeOp0 / NarrowSize;
1724 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1725 if (SrcTy.isVector() && LeftoverBits != 0)
1728 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1729 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1731 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1732 StoreMI.eraseFromParent();
1738 case TargetOpcode::G_SELECT:
1740 case TargetOpcode::G_AND:
1741 case TargetOpcode::G_OR:
1742 case TargetOpcode::G_XOR: {
1754 case TargetOpcode::G_SHL:
1755 case TargetOpcode::G_LSHR:
1756 case TargetOpcode::G_ASHR:
1758 case TargetOpcode::G_CTLZ:
1759 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1760 case TargetOpcode::G_CTTZ:
1761 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTLS:
1763 case TargetOpcode::G_CTPOP:
1765 switch (
MI.getOpcode()) {
1766 case TargetOpcode::G_CTLZ:
1767 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1769 case TargetOpcode::G_CTTZ:
1770 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1772 case TargetOpcode::G_CTPOP:
1774 case TargetOpcode::G_CTLS:
1784 case TargetOpcode::G_INTTOPTR:
1792 case TargetOpcode::G_PTRTOINT:
1800 case TargetOpcode::G_PHI: {
1803 if (SizeOp0 % NarrowSize != 0)
1806 unsigned NumParts = SizeOp0 / NarrowSize;
1810 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1818 for (
unsigned i = 0; i < NumParts; ++i) {
1819 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1821 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1822 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1823 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1826 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1828 MI.eraseFromParent();
1831 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1832 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1836 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1842 case TargetOpcode::G_ICMP: {
1844 LLT SrcTy = MRI.getType(LHS);
1850 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1856 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1857 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1863 LLT ResTy = MRI.getType(Dst);
1868 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1870 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1871 auto LHS = std::get<0>(LHSAndRHS);
1872 auto RHS = std::get<1>(LHSAndRHS);
1873 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1880 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1881 auto LHS = std::get<0>(LHSAndRHS);
1882 auto RHS = std::get<1>(LHSAndRHS);
1883 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1884 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1885 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1886 TargetOpcode::G_ZEXT);
1893 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1894 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1895 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1900 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1904 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1909 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1913 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1916 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1919 LHSPartRegs[
I], RHSPartRegs[
I]);
1920 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1926 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1940 RHSLeftoverRegs[
I]);
1942 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1943 RHSLeftoverRegs[
I]);
1946 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1947 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1953 MI.eraseFromParent();
1956 case TargetOpcode::G_FCMP:
1965 case TargetOpcode::G_SEXT_INREG: {
1969 int64_t SizeInBits =
MI.getOperand(2).getImm();
1978 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
1979 MO1.
setReg(TruncMIB.getReg(0));
1982 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1994 if (SizeOp0 % NarrowSize != 0)
1996 int NumParts = SizeOp0 / NarrowSize;
2004 for (
int i = 0; i < NumParts; ++i) {
2005 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2020 for (
int i = 0; i < NumParts; ++i) {
2023 PartialExtensionReg = DstRegs.
back();
2025 assert(PartialExtensionReg &&
2026 "Expected to visit partial extension before full");
2027 if (FullExtensionReg) {
2032 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2034 FullExtensionReg = DstRegs.
back();
2039 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2042 PartialExtensionReg = DstRegs.
back();
2048 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2049 MI.eraseFromParent();
2052 case TargetOpcode::G_BSWAP:
2053 case TargetOpcode::G_BITREVERSE: {
2054 if (SizeOp0 % NarrowSize != 0)
2059 unsigned NumParts = SizeOp0 / NarrowSize;
2060 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2063 for (
unsigned i = 0; i < NumParts; ++i) {
2064 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2065 {SrcRegs[NumParts - 1 - i]});
2069 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2072 MI.eraseFromParent();
2075 case TargetOpcode::G_PTR_ADD:
2076 case TargetOpcode::G_PTRMASK: {
2084 case TargetOpcode::G_FPTOUI:
2085 case TargetOpcode::G_FPTOSI:
2086 case TargetOpcode::G_FPTOUI_SAT:
2087 case TargetOpcode::G_FPTOSI_SAT:
2089 case TargetOpcode::G_FPEXT:
2096 case TargetOpcode::G_FLDEXP:
2097 case TargetOpcode::G_STRICT_FLDEXP:
2099 case TargetOpcode::G_VSCALE: {
2101 LLT Ty = MRI.getType(Dst);
2105 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2106 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2107 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2110 MI.eraseFromParent();
2117 LLT Ty = MRI.getType(Val);
2123 if (Ty.isPointer()) {
2124 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2126 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2132 if (Ty.isPointerVector())
2133 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2134 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2138 unsigned OpIdx,
unsigned ExtOpcode) {
2140 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2141 MO.
setReg(ExtB.getReg(0));
2147 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2148 MO.
setReg(ExtB.getReg(0));
2152 unsigned OpIdx,
unsigned TruncOpcode) {
2154 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2156 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2161 unsigned OpIdx,
unsigned ExtOpcode) {
2163 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2165 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2174 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2176 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2182 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2192 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2199LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2204 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2205 if (DstTy.isVector())
2210 const int SrcSize = SrcTy.getSizeInBits();
2212 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2214 unsigned NumOps =
MI.getNumOperands();
2215 unsigned NumSrc =
MI.getNumOperands() - 1;
2216 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2218 if (WideSize >= DstSize) {
2222 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2223 const unsigned Offset = (
I - 1) * PartSize;
2236 ResultReg = NextResult;
2239 if (WideSize > DstSize)
2241 else if (DstTy.isPointer())
2244 MI.eraseFromParent();
2269 const int GCD = std::gcd(SrcSize, WideSize);
2279 if (GCD == SrcSize) {
2282 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2283 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2289 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2291 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2295 const int PartsPerGCD = WideSize / GCD;
2299 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2301 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2308 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2310 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2311 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2314 MI.eraseFromParent();
2319LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2324 int NumDst =
MI.getNumOperands() - 1;
2325 Register SrcReg =
MI.getOperand(NumDst).getReg();
2326 LLT SrcTy = MRI.getType(SrcReg);
2330 Register Dst0Reg =
MI.getOperand(0).getReg();
2331 LLT DstTy = MRI.getType(Dst0Reg);
2340 dbgs() <<
"Not casting non-integral address space integer\n");
2345 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2353 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2361 for (
int I = 1;
I != NumDst; ++
I) {
2362 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2363 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2367 MI.eraseFromParent();
2378 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2382 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2385 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2403 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2408 if (PartsPerRemerge == 1) {
2411 for (
int I = 0;
I != NumUnmerge; ++
I) {
2412 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2414 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2415 int Idx =
I * PartsPerUnmerge + J;
2417 MIB.addDef(
MI.getOperand(Idx).getReg());
2420 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2424 MIB.addUse(Unmerge.getReg(
I));
2427 SmallVector<Register, 16> Parts;
2428 for (
int J = 0; J != NumUnmerge; ++J)
2429 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2432 for (
int I = 0;
I != NumDst; ++
I) {
2433 for (
int J = 0; J < PartsPerRemerge; ++J) {
2434 const int Idx =
I * PartsPerRemerge + J;
2438 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2439 RemergeParts.
clear();
2443 MI.eraseFromParent();
2448LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2450 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2451 unsigned Offset =
MI.getOperand(2).getImm();
2454 if (SrcTy.
isVector() || DstTy.isVector())
2466 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2470 if (DstTy.isPointer())
2477 MI.eraseFromParent();
2482 LLT ShiftTy = SrcTy;
2491 MI.eraseFromParent();
2522LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2524 if (TypeIdx != 0 || WideTy.
isVector())
2534LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2538 std::optional<Register> CarryIn;
2539 switch (
MI.getOpcode()) {
2542 case TargetOpcode::G_SADDO:
2543 Opcode = TargetOpcode::G_ADD;
2544 ExtOpcode = TargetOpcode::G_SEXT;
2546 case TargetOpcode::G_SSUBO:
2547 Opcode = TargetOpcode::G_SUB;
2548 ExtOpcode = TargetOpcode::G_SEXT;
2550 case TargetOpcode::G_UADDO:
2551 Opcode = TargetOpcode::G_ADD;
2552 ExtOpcode = TargetOpcode::G_ZEXT;
2554 case TargetOpcode::G_USUBO:
2555 Opcode = TargetOpcode::G_SUB;
2556 ExtOpcode = TargetOpcode::G_ZEXT;
2558 case TargetOpcode::G_SADDE:
2559 Opcode = TargetOpcode::G_UADDE;
2560 ExtOpcode = TargetOpcode::G_SEXT;
2561 CarryIn =
MI.getOperand(4).getReg();
2563 case TargetOpcode::G_SSUBE:
2564 Opcode = TargetOpcode::G_USUBE;
2565 ExtOpcode = TargetOpcode::G_SEXT;
2566 CarryIn =
MI.getOperand(4).getReg();
2568 case TargetOpcode::G_UADDE:
2569 Opcode = TargetOpcode::G_UADDE;
2570 ExtOpcode = TargetOpcode::G_ZEXT;
2571 CarryIn =
MI.getOperand(4).getReg();
2573 case TargetOpcode::G_USUBE:
2574 Opcode = TargetOpcode::G_USUBE;
2575 ExtOpcode = TargetOpcode::G_ZEXT;
2576 CarryIn =
MI.getOperand(4).getReg();
2592 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2593 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2597 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2599 .buildInstr(Opcode, {WideTy, CarryOutTy},
2600 {LHSExt, RHSExt, *CarryIn})
2603 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2605 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2606 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2607 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2612 MI.eraseFromParent();
2617LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2619 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2621 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2622 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2623 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2636 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2643 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2647 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2648 {ShiftL, ShiftR},
MI.getFlags());
2653 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2656 MI.eraseFromParent();
2661LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2670 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2672 LLT SrcTy = MRI.getType(
LHS);
2673 LLT OverflowTy = MRI.getType(OriginalOverflow);
2680 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2681 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2682 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2689 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2691 MachineInstrBuilder Mulo;
2692 if (WideMulCanOverflow)
2693 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2694 {LeftOperand, RightOperand});
2696 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2701 MachineInstrBuilder ExtResult;
2708 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2712 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2715 if (WideMulCanOverflow) {
2723 MI.eraseFromParent();
2729 unsigned Opcode =
MI.getOpcode();
2733 case TargetOpcode::G_ATOMICRMW_XCHG:
2734 case TargetOpcode::G_ATOMICRMW_ADD:
2735 case TargetOpcode::G_ATOMICRMW_SUB:
2736 case TargetOpcode::G_ATOMICRMW_AND:
2737 case TargetOpcode::G_ATOMICRMW_OR:
2738 case TargetOpcode::G_ATOMICRMW_XOR:
2739 case TargetOpcode::G_ATOMICRMW_MIN:
2740 case TargetOpcode::G_ATOMICRMW_MAX:
2741 case TargetOpcode::G_ATOMICRMW_UMIN:
2742 case TargetOpcode::G_ATOMICRMW_UMAX:
2743 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2749 case TargetOpcode::G_ATOMIC_CMPXCHG:
2750 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2757 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2767 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2772 case TargetOpcode::G_EXTRACT:
2773 return widenScalarExtract(
MI, TypeIdx, WideTy);
2774 case TargetOpcode::G_INSERT:
2775 return widenScalarInsert(
MI, TypeIdx, WideTy);
2776 case TargetOpcode::G_MERGE_VALUES:
2777 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2778 case TargetOpcode::G_UNMERGE_VALUES:
2779 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2780 case TargetOpcode::G_SADDO:
2781 case TargetOpcode::G_SSUBO:
2782 case TargetOpcode::G_UADDO:
2783 case TargetOpcode::G_USUBO:
2784 case TargetOpcode::G_SADDE:
2785 case TargetOpcode::G_SSUBE:
2786 case TargetOpcode::G_UADDE:
2787 case TargetOpcode::G_USUBE:
2788 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2789 case TargetOpcode::G_UMULO:
2790 case TargetOpcode::G_SMULO:
2791 return widenScalarMulo(
MI, TypeIdx, WideTy);
2792 case TargetOpcode::G_SADDSAT:
2793 case TargetOpcode::G_SSUBSAT:
2794 case TargetOpcode::G_SSHLSAT:
2795 case TargetOpcode::G_UADDSAT:
2796 case TargetOpcode::G_USUBSAT:
2797 case TargetOpcode::G_USHLSAT:
2798 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2799 case TargetOpcode::G_CTTZ:
2800 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2801 case TargetOpcode::G_CTLZ:
2802 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2803 case TargetOpcode::G_CTLS:
2804 case TargetOpcode::G_CTPOP: {
2817 case TargetOpcode::G_CTTZ:
2818 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2819 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2820 ExtOpc = TargetOpcode::G_ANYEXT;
2822 case TargetOpcode::G_CTLS:
2823 ExtOpc = TargetOpcode::G_SEXT;
2826 ExtOpc = TargetOpcode::G_ZEXT;
2829 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2830 LLT CurTy = MRI.getType(SrcReg);
2831 unsigned NewOpc = Opcode;
2832 if (NewOpc == TargetOpcode::G_CTTZ) {
2839 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2841 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2847 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2857 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2859 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2864 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff),
2865 Opcode == TargetOpcode::G_CTLZ
2870 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2871 MI.eraseFromParent();
2874 case TargetOpcode::G_BSWAP: {
2878 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2879 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2880 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2883 MI.getOperand(0).setReg(DstExt);
2887 LLT Ty = MRI.getType(DstReg);
2889 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2890 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2896 case TargetOpcode::G_BITREVERSE: {
2900 LLT Ty = MRI.getType(DstReg);
2903 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2905 MI.getOperand(0).setReg(DstExt);
2908 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2909 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2914 case TargetOpcode::G_FREEZE:
2915 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2922 case TargetOpcode::G_ABS:
2929 case TargetOpcode::G_ADD:
2930 case TargetOpcode::G_AND:
2931 case TargetOpcode::G_MUL:
2932 case TargetOpcode::G_OR:
2933 case TargetOpcode::G_XOR:
2934 case TargetOpcode::G_SUB:
2935 case TargetOpcode::G_SHUFFLE_VECTOR:
2946 case TargetOpcode::G_SBFX:
2947 case TargetOpcode::G_UBFX:
2961 case TargetOpcode::G_SHL:
2977 case TargetOpcode::G_ROTR:
2978 case TargetOpcode::G_ROTL:
2987 case TargetOpcode::G_SDIV:
2988 case TargetOpcode::G_SREM:
2989 case TargetOpcode::G_SMIN:
2990 case TargetOpcode::G_SMAX:
2991 case TargetOpcode::G_ABDS:
2999 case TargetOpcode::G_SDIVREM:
3009 case TargetOpcode::G_ASHR:
3010 case TargetOpcode::G_LSHR:
3014 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3015 : TargetOpcode::G_ZEXT;
3028 case TargetOpcode::G_UDIV:
3029 case TargetOpcode::G_UREM:
3030 case TargetOpcode::G_ABDU:
3037 case TargetOpcode::G_UDIVREM:
3046 case TargetOpcode::G_UMIN:
3047 case TargetOpcode::G_UMAX: {
3048 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3050 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3054 ? TargetOpcode::G_SEXT
3055 : TargetOpcode::G_ZEXT;
3065 case TargetOpcode::G_SELECT:
3075 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3082 case TargetOpcode::G_FPEXT:
3090 case TargetOpcode::G_FPTOSI:
3091 case TargetOpcode::G_FPTOUI:
3092 case TargetOpcode::G_INTRINSIC_LRINT:
3093 case TargetOpcode::G_INTRINSIC_LLRINT:
3094 case TargetOpcode::G_IS_FPCLASS:
3104 case TargetOpcode::G_SITOFP:
3114 case TargetOpcode::G_UITOFP:
3124 case TargetOpcode::G_FPTOSI_SAT:
3125 case TargetOpcode::G_FPTOUI_SAT:
3130 LLT Ty = MRI.getType(OldDst);
3131 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3133 MI.getOperand(0).setReg(ExtReg);
3134 uint64_t ShortBits = Ty.getScalarSizeInBits();
3137 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3148 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3149 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3157 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3165 case TargetOpcode::G_LOAD:
3166 case TargetOpcode::G_SEXTLOAD:
3167 case TargetOpcode::G_ZEXTLOAD:
3173 case TargetOpcode::G_STORE: {
3177 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3178 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3179 if (!Ty.isScalar()) {
3187 MI.setMemRefs(MF, {NewMMO});
3194 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3195 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3201 case TargetOpcode::G_CONSTANT: {
3204 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3205 MRI.getType(
MI.getOperand(0).getReg()));
3206 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3207 ExtOpc == TargetOpcode::G_ANYEXT) &&
3210 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3214 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3220 case TargetOpcode::G_FCONSTANT: {
3226 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3228 MI.eraseFromParent();
3231 case TargetOpcode::G_IMPLICIT_DEF: {
3237 case TargetOpcode::G_BRCOND:
3243 case TargetOpcode::G_FCMP:
3254 case TargetOpcode::G_ICMP:
3259 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3263 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3264 unsigned ExtOpcode =
3268 ? TargetOpcode::G_SEXT
3269 : TargetOpcode::G_ZEXT;
3276 case TargetOpcode::G_PTR_ADD:
3277 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3283 case TargetOpcode::G_PHI: {
3284 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3287 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3299 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3302 LLT VecTy = MRI.getType(VecReg);
3306 TargetOpcode::G_ANYEXT);
3320 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3336 LLT VecTy = MRI.getType(VecReg);
3355 case TargetOpcode::G_FADD:
3356 case TargetOpcode::G_FMUL:
3357 case TargetOpcode::G_FSUB:
3358 case TargetOpcode::G_FMA:
3359 case TargetOpcode::G_FMAD:
3360 case TargetOpcode::G_FNEG:
3361 case TargetOpcode::G_FABS:
3362 case TargetOpcode::G_FCANONICALIZE:
3363 case TargetOpcode::G_FMINNUM:
3364 case TargetOpcode::G_FMAXNUM:
3365 case TargetOpcode::G_FMINNUM_IEEE:
3366 case TargetOpcode::G_FMAXNUM_IEEE:
3367 case TargetOpcode::G_FMINIMUM:
3368 case TargetOpcode::G_FMAXIMUM:
3369 case TargetOpcode::G_FMINIMUMNUM:
3370 case TargetOpcode::G_FMAXIMUMNUM:
3371 case TargetOpcode::G_FDIV:
3372 case TargetOpcode::G_FREM:
3373 case TargetOpcode::G_FCEIL:
3374 case TargetOpcode::G_FFLOOR:
3375 case TargetOpcode::G_FCOS:
3376 case TargetOpcode::G_FSIN:
3377 case TargetOpcode::G_FTAN:
3378 case TargetOpcode::G_FACOS:
3379 case TargetOpcode::G_FASIN:
3380 case TargetOpcode::G_FATAN:
3381 case TargetOpcode::G_FATAN2:
3382 case TargetOpcode::G_FCOSH:
3383 case TargetOpcode::G_FSINH:
3384 case TargetOpcode::G_FTANH:
3385 case TargetOpcode::G_FLOG10:
3386 case TargetOpcode::G_FLOG:
3387 case TargetOpcode::G_FLOG2:
3388 case TargetOpcode::G_FRINT:
3389 case TargetOpcode::G_FNEARBYINT:
3390 case TargetOpcode::G_FSQRT:
3391 case TargetOpcode::G_FEXP:
3392 case TargetOpcode::G_FEXP2:
3393 case TargetOpcode::G_FEXP10:
3394 case TargetOpcode::G_FPOW:
3395 case TargetOpcode::G_INTRINSIC_TRUNC:
3396 case TargetOpcode::G_INTRINSIC_ROUND:
3397 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3401 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3407 case TargetOpcode::G_FMODF: {
3417 case TargetOpcode::G_FPOWI:
3418 case TargetOpcode::G_FLDEXP:
3419 case TargetOpcode::G_STRICT_FLDEXP: {
3421 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3442 case TargetOpcode::G_FFREXP: {
3455 case TargetOpcode::G_LROUND:
3456 case TargetOpcode::G_LLROUND:
3467 case TargetOpcode::G_INTTOPTR:
3475 case TargetOpcode::G_PTRTOINT:
3483 case TargetOpcode::G_BUILD_VECTOR: {
3487 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3493 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3501 case TargetOpcode::G_SEXT_INREG:
3510 case TargetOpcode::G_PTRMASK: {
3518 case TargetOpcode::G_VECREDUCE_ADD: {
3527 case TargetOpcode::G_VECREDUCE_FADD:
3528 case TargetOpcode::G_VECREDUCE_FMUL:
3529 case TargetOpcode::G_VECREDUCE_FMIN:
3530 case TargetOpcode::G_VECREDUCE_FMAX:
3531 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3532 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3537 LLT VecTy = MRI.getType(VecReg);
3544 case TargetOpcode::G_VSCALE: {
3551 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3556 case TargetOpcode::G_SPLAT_VECTOR: {
3565 case TargetOpcode::G_INSERT_SUBVECTOR: {
3573 LLT SubVecTy = MRI.getType(SubVec);
3577 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3578 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3579 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3583 auto SplatZero =
MIRBuilder.buildSplatVector(
3588 MI.eraseFromParent();
3597 auto Unmerge =
B.buildUnmerge(Ty, Src);
3598 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3607 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3621 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3630 MI.eraseFromParent();
3641 MI.eraseFromParent();
3648 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3649 if (SrcTy.isVector()) {
3653 if (DstTy.isVector()) {
3654 int NumDstElt = DstTy.getNumElements();
3655 int NumSrcElt = SrcTy.getNumElements();
3658 LLT DstCastTy = DstEltTy;
3659 LLT SrcPartTy = SrcEltTy;
3663 if (NumSrcElt < NumDstElt) {
3674 SrcPartTy = SrcEltTy;
3675 }
else if (NumSrcElt > NumDstElt) {
3687 DstCastTy = DstEltTy;
3692 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3696 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3697 MI.eraseFromParent();
3701 if (DstTy.isVector()) {
3704 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3705 MI.eraseFromParent();
3721 unsigned NewEltSize,
3722 unsigned OldEltSize) {
3723 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3724 LLT IdxTy =
B.getMRI()->getType(Idx);
3727 auto OffsetMask =
B.buildConstant(
3729 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3730 return B.buildShl(IdxTy, OffsetIdx,
3731 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3746 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3750 unsigned OldNumElts = SrcVecTy.getNumElements();
3757 if (NewNumElts > OldNumElts) {
3768 if (NewNumElts % OldNumElts != 0)
3772 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3776 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3779 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3781 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3782 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3783 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3784 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3785 NewOps[
I] = Elt.getReg(0);
3788 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3790 MI.eraseFromParent();
3794 if (NewNumElts < OldNumElts) {
3795 if (NewEltSize % OldEltSize != 0)
3817 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3818 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3821 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3825 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3826 ScaledIdx).getReg(0);
3834 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3836 MI.eraseFromParent();
3850 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3851 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3852 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3853 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3856 auto EltMask =
B.buildConstant(
3860 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3861 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3864 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3868 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3882 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3883 MI.getFirst4RegLLTs();
3895 if (NewNumElts < OldNumElts) {
3896 if (NewEltSize % OldEltSize != 0)
3905 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3906 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3909 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3913 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3914 ScaledIdx).getReg(0);
3924 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3925 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3929 MI.eraseFromParent();
3959 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3963 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3964 return UnableToLegalize;
3969 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3971 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3980 MI.eraseFromParent();
3998 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3999 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4009 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4010 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4012 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4013 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4015 MI.eraseFromParent();
4045 LLT DstTy = MRI.getType(Dst);
4046 LLT SrcTy = MRI.getType(Src);
4052 if (DstTy == CastTy)
4060 if (CastEltSize < DstEltSize)
4063 auto AdjustAmt = CastEltSize / DstEltSize;
4064 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4065 SrcTyMinElts % AdjustAmt != 0)
4070 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4071 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4074 ES->eraseFromParent();
4109 LLT DstTy = MRI.getType(Dst);
4110 LLT BigVecTy = MRI.getType(BigVec);
4111 LLT SubVecTy = MRI.getType(SubVec);
4113 if (DstTy == CastTy)
4128 if (CastEltSize < DstEltSize)
4131 auto AdjustAmt = CastEltSize / DstEltSize;
4132 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4133 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4139 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4140 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4142 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4145 ES->eraseFromParent();
4153 LLT DstTy = MRI.getType(DstReg);
4163 if (MemSizeInBits != MemStoreSizeInBits) {
4180 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4184 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4185 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4187 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4190 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4192 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4195 if (DstTy != LoadTy)
4203 if (
MIRBuilder.getDataLayout().isBigEndian())
4221 uint64_t LargeSplitSize, SmallSplitSize;
4226 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4233 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4236 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4247 if (Alignment.
value() * 8 > MemSizeInBits &&
4252 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4269 LLT PtrTy = MRI.getType(PtrReg);
4282 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4285 auto OffsetCst =
MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4286 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4287 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4289 SmallPtr, *SmallMMO);
4291 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4292 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4294 if (AnyExtTy == DstTy)
4295 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4297 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4301 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4321 LLT SrcTy = MRI.getType(SrcReg);
4329 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4335 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4337 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4341 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4345 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4360 uint64_t LargeSplitSize, SmallSplitSize;
4367 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4370 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4379 if (SrcTy.isPointer()) {
4381 SrcReg =
MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4384 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4387 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4388 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4391 LLT PtrTy = MRI.getType(PtrReg);
4394 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4400 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4401 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4410 LLT SrcTy = MRI.getType(SrcReg);
4416 assert(SrcTy.isVector() &&
"Expect a vector store type");
4423 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4427 auto Elt =
MIRBuilder.buildExtractVectorElement(
4428 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4429 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4430 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4436 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4437 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4441 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4452 switch (
MI.getOpcode()) {
4453 case TargetOpcode::G_LOAD: {
4471 case TargetOpcode::G_STORE: {
4487 case TargetOpcode::G_SELECT: {
4491 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4493 dbgs() <<
"bitcast action not implemented for vector select\n");
4504 case TargetOpcode::G_AND:
4505 case TargetOpcode::G_OR:
4506 case TargetOpcode::G_XOR: {
4514 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4516 case TargetOpcode::G_INSERT_VECTOR_ELT:
4518 case TargetOpcode::G_CONCAT_VECTORS:
4520 case TargetOpcode::G_SHUFFLE_VECTOR:
4522 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4524 case TargetOpcode::G_INSERT_SUBVECTOR:
4532void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4542 switch(
MI.getOpcode()) {
4545 case TargetOpcode::G_FCONSTANT:
4547 case TargetOpcode::G_BITCAST:
4549 case TargetOpcode::G_SREM:
4550 case TargetOpcode::G_UREM: {
4551 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4553 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4554 {MI.getOperand(1), MI.getOperand(2)});
4556 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4558 MI.eraseFromParent();
4561 case TargetOpcode::G_SADDO:
4562 case TargetOpcode::G_SSUBO:
4564 case TargetOpcode::G_SADDE:
4566 case TargetOpcode::G_SSUBE:
4568 case TargetOpcode::G_UMULH:
4569 case TargetOpcode::G_SMULH:
4571 case TargetOpcode::G_SMULO:
4572 case TargetOpcode::G_UMULO: {
4575 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4576 LLT Ty = MRI.getType(Res);
4578 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4579 ? TargetOpcode::G_SMULH
4580 : TargetOpcode::G_UMULH;
4584 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4585 MI.removeOperand(1);
4588 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4596 if (Opcode == TargetOpcode::G_SMULH) {
4597 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4598 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4605 case TargetOpcode::G_FNEG: {
4606 auto [Res, ResTy, SubByReg, SubByRegTy] =
MI.getFirst2RegLLTs();
4609 Register CastedSubByReg = SubByReg;
4611 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4612 !SubByRegTy.getScalarType().isInteger()) {
4613 auto BitcastDst = SubByRegTy.changeElementType(
4615 CastedSubByReg =
MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4621 if (ResTy != TyInt) {
4623 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4626 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4628 MI.eraseFromParent();
4631 case TargetOpcode::G_FSUB:
4632 case TargetOpcode::G_STRICT_FSUB: {
4633 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4634 LLT Ty = MRI.getType(Res);
4639 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4640 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4644 MI.eraseFromParent();
4647 case TargetOpcode::G_FMAD:
4649 case TargetOpcode::G_FFLOOR:
4651 case TargetOpcode::G_LROUND:
4652 case TargetOpcode::G_LLROUND: {
4655 LLT SrcTy = MRI.getType(SrcReg);
4656 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4659 MI.eraseFromParent();
4662 case TargetOpcode::G_INTRINSIC_ROUND:
4664 case TargetOpcode::G_FRINT: {
4667 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4670 case TargetOpcode::G_INTRINSIC_LRINT:
4671 case TargetOpcode::G_INTRINSIC_LLRINT: {
4674 LLT SrcTy = MRI.getType(SrcReg);
4676 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4678 MI.eraseFromParent();
4681 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4682 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4683 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4684 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4685 **
MI.memoperands_begin());
4687 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4688 MI.eraseFromParent();
4691 case TargetOpcode::G_LOAD:
4692 case TargetOpcode::G_SEXTLOAD:
4693 case TargetOpcode::G_ZEXTLOAD:
4695 case TargetOpcode::G_STORE:
4697 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4698 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4699 case TargetOpcode::G_CTLZ:
4700 case TargetOpcode::G_CTTZ:
4701 case TargetOpcode::G_CTPOP:
4702 case TargetOpcode::G_CTLS:
4705 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4707 Register NewRes = MRI.cloneVirtualRegister(Res);
4714 MI.eraseFromParent();
4718 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4719 const LLT CondTy = MRI.getType(CarryOut);
4720 const LLT Ty = MRI.getType(Res);
4722 Register NewRes = MRI.cloneVirtualRegister(Res);
4725 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4731 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4732 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4739 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4744 MI.eraseFromParent();
4748 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4753 MI.eraseFromParent();
4757 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4758 const LLT CondTy = MRI.getType(BorrowOut);
4759 const LLT Ty = MRI.getType(Res);
4762 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4768 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4769 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4776 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4777 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4779 MI.eraseFromParent();
4817 case G_MERGE_VALUES:
4819 case G_UNMERGE_VALUES:
4821 case TargetOpcode::G_SEXT_INREG: {
4822 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4823 int64_t SizeInBits =
MI.getOperand(2).getImm();
4825 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4826 LLT DstTy = MRI.getType(DstReg);
4827 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4830 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4831 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4832 MI.eraseFromParent();
4835 case G_EXTRACT_VECTOR_ELT:
4836 case G_INSERT_VECTOR_ELT:
4838 case G_SHUFFLE_VECTOR:
4840 case G_VECTOR_COMPRESS:
4842 case G_DYN_STACKALLOC:
4846 case G_STACKRESTORE:
4856 case G_READ_REGISTER:
4857 case G_WRITE_REGISTER:
4864 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4865 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4871 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4876 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4887 bool IsSigned =
MI.getOpcode() == G_ABDS;
4888 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4889 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4890 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4914 case G_MEMCPY_INLINE:
4915 return lowerMemcpyInline(
MI);
4926 case G_ATOMICRMW_SUB: {
4927 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
4928 const LLT ValTy = MRI.getType(Val);
4932 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4933 MI.eraseFromParent();
4956 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4960 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4966 Align StackTypeAlign =
4973 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4974 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4979 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4991 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4994 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
5005 "Converting bits to bytes lost precision");
5011 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5012 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
5014 if (IdxTy != MRI.getType(Index))
5015 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5020 LLT PtrTy = MRI.getType(VecPtr);
5021 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
5029 std::initializer_list<unsigned> NonVecOpIndices) {
5030 if (
MI.getNumMemOperands() != 0)
5047 if (!Ty.isVector()) {
5053 if (Ty.getNumElements() != NumElts)
5068 assert(Ty.isVector() &&
"Expected vector type");
5070 int NumParts, NumLeftover;
5071 std::tie(NumParts, NumLeftover) =
5074 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5075 for (
int i = 0; i < NumParts; ++i) {
5080 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5089 for (
unsigned i = 0; i <
N; ++i) {
5091 Ops.push_back(
Op.getReg());
5092 else if (
Op.isImm())
5093 Ops.push_back(
Op.getImm());
5094 else if (
Op.isPredicate())
5116 std::initializer_list<unsigned> NonVecOpIndices) {
5118 "Non-compatible opcode or not specified non-vector operands");
5119 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5121 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5122 unsigned NumDefs =
MI.getNumDefs();
5130 for (
unsigned i = 0; i < NumDefs; ++i) {
5131 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5139 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5140 ++UseIdx, ++UseNo) {
5143 MI.getOperand(UseIdx));
5152 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5156 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5158 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5159 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5162 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5163 Uses.push_back(InputOpsPieces[InputNo][i]);
5166 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5167 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5172 for (
unsigned i = 0; i < NumDefs; ++i)
5173 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5175 for (
unsigned i = 0; i < NumDefs; ++i)
5176 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5179 MI.eraseFromParent();
5186 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5188 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5189 unsigned NumDefs =
MI.getNumDefs();
5193 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5198 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5199 UseIdx += 2, ++UseNo) {
5207 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5209 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5210 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5212 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5215 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5216 Phi.addUse(InputOpsPieces[j][i]);
5217 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5227 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5229 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5232 MI.eraseFromParent();
5240 const int NumDst =
MI.getNumOperands() - 1;
5241 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5242 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5243 LLT SrcTy = MRI.getType(SrcReg);
5245 if (TypeIdx != 1 || NarrowTy == DstTy)
5252 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5255 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5269 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5270 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5271 const int PartsPerUnmerge = NumDst / NumUnmerge;
5273 for (
int I = 0;
I != NumUnmerge; ++
I) {
5274 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5276 for (
int J = 0; J != PartsPerUnmerge; ++J)
5277 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5278 MIB.addUse(Unmerge.getReg(
I));
5281 MI.eraseFromParent();
5288 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5292 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5294 if (NarrowTy == SrcTy)
5302 assert(SrcTy.isVector() &&
"Expected vector types");
5304 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5318 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5319 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5320 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5326 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5327 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5328 ++i,
Offset += NumNarrowTyElts) {
5331 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5334 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5335 MI.eraseFromParent();
5339 assert(TypeIdx == 0 &&
"Bad type index");
5340 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5355 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5356 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5358 for (
unsigned i = 0; i < NumParts; ++i) {
5360 for (
unsigned j = 0; j < NumElts; ++j)
5361 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5363 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5366 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5367 MI.eraseFromParent();
5375 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5377 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5379 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5381 InsertVal =
MI.getOperand(2).getReg();
5383 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5384 LLT VecTy = MRI.getType(SrcVec);
5390 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5394 MI.eraseFromParent();
5403 SplitPieces[IdxVal] = InsertVal;
5404 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5406 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5410 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5413 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5414 TargetOpcode::G_ANYEXT);
5418 LLT IdxTy = MRI.getType(Idx);
5419 int64_t PartIdx = IdxVal / NewNumElts;
5421 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5424 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5427 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5428 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5429 VecParts[PartIdx] = InsertPart.getReg(0);
5433 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5435 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5439 MI.eraseFromParent();
5459 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5471 LLT ValTy = MRI.getType(ValReg);
5480 int NumLeftover = -1;
5486 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5488 NumParts = NarrowRegs.
size();
5489 NumLeftover = NarrowLeftoverRegs.
size();
5496 LLT PtrTy = MRI.getType(AddrReg);
5506 auto MMO = LdStMI.
getMMO();
5508 unsigned NumParts,
unsigned Offset) ->
unsigned {
5511 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5513 unsigned ByteOffset =
Offset / 8;
5516 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5523 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5524 ValRegs.push_back(Dst);
5525 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5527 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5536 unsigned HandledOffset =
5537 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5541 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5544 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5545 LeftoverTy, NarrowLeftoverRegs);
5559 switch (
MI.getOpcode()) {
5560 case G_IMPLICIT_DEF:
5576 case G_FCANONICALIZE:
5593 case G_INTRINSIC_LRINT:
5594 case G_INTRINSIC_LLRINT:
5595 case G_INTRINSIC_ROUND:
5596 case G_INTRINSIC_ROUNDEVEN:
5599 case G_INTRINSIC_TRUNC:
5627 case G_FMINNUM_IEEE:
5628 case G_FMAXNUM_IEEE:
5650 case G_CTLZ_ZERO_UNDEF:
5652 case G_CTTZ_ZERO_UNDEF:
5669 case G_ADDRSPACE_CAST:
5682 case G_STRICT_FLDEXP:
5684 case G_TRUNC_SSAT_S:
5685 case G_TRUNC_SSAT_U:
5686 case G_TRUNC_USAT_U:
5694 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5699 case G_UNMERGE_VALUES:
5701 case G_BUILD_VECTOR:
5702 assert(TypeIdx == 0 &&
"not a vector type index");
5704 case G_CONCAT_VECTORS:
5708 case G_EXTRACT_VECTOR_ELT:
5709 case G_INSERT_VECTOR_ELT:
5718 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5719 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5721 case G_SHUFFLE_VECTOR:
5727 case G_INTRINSIC_FPTRUNC_ROUND:
5737 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5738 "Not a bitcast operation");
5743 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5745 unsigned NewElemCount =
5748 if (NewElemCount == 1) {
5751 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5758 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5767 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5768 MI.eraseFromParent();
5774 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5778 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5779 MI.getFirst3RegLLTs();
5782 if (DstTy != Src1Ty)
5784 if (DstTy != Src2Ty)
5799 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5815 unsigned InputUsed[2] = {-1U, -1U};
5816 unsigned FirstMaskIdx =
High * NewElts;
5817 bool UseBuildVector =
false;
5818 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5820 int Idx = Mask[FirstMaskIdx + MaskOffset];
5825 if (
Input >= std::size(Inputs)) {
5832 Idx -=
Input * NewElts;
5836 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5837 if (InputUsed[OpNo] ==
Input) {
5840 }
else if (InputUsed[OpNo] == -1U) {
5842 InputUsed[OpNo] =
Input;
5847 if (OpNo >= std::size(InputUsed)) {
5850 UseBuildVector =
true;
5855 Ops.push_back(Idx + OpNo * NewElts);
5858 if (UseBuildVector) {
5863 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5865 int Idx = Mask[FirstMaskIdx + MaskOffset];
5870 if (
Input >= std::size(Inputs)) {
5877 Idx -=
Input * NewElts;
5881 .buildExtractVectorElement(
5882 EltTy, Inputs[
Input],
5888 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5889 }
else if (InputUsed[0] == -1U) {
5891 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5892 }
else if (NewElts == 1) {
5893 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5895 Register Op0 = Inputs[InputUsed[0]];
5899 : Inputs[InputUsed[1]];
5901 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5908 MI.eraseFromParent();
5921 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5927 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5930 const unsigned NumParts =
5932 : SrcTy.getNumElements();
5936 if (DstTy != NarrowTy)
5942 unsigned NumPartsLeft = NumParts;
5943 while (NumPartsLeft > 1) {
5944 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5947 .buildInstr(ScalarOpc, {NarrowTy},
5948 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5951 SplitSrcs = PartialResults;
5952 PartialResults.
clear();
5953 NumPartsLeft = SplitSrcs.
size();
5957 MI.eraseFromParent();
5962 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
5963 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5966 MI.eraseFromParent();
5970 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5972 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5980 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5983 Register Acc = PartialReductions[0];
5984 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5985 if (Part == NumParts - 1) {
5987 {Acc, PartialReductions[Part]});
5990 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5994 MI.eraseFromParent();
6000 unsigned int TypeIdx,
6002 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6003 MI.getFirst3RegLLTs();
6004 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6008 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6009 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6010 "Unexpected vecreduce opcode");
6011 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6012 ? TargetOpcode::G_FADD
6013 : TargetOpcode::G_FMUL;
6016 unsigned NumParts = SrcTy.getNumElements();
6019 for (
unsigned i = 0; i < NumParts; i++)
6020 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6024 MI.eraseFromParent();
6031 unsigned ScalarOpc) {
6039 while (SplitSrcs.
size() > 1) {
6041 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6049 SplitSrcs = std::move(PartialRdxs);
6053 MI.getOperand(1).setReg(SplitSrcs[0]);
6060 const LLT HalfTy,
const LLT AmtTy) {
6062 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6063 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6067 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6068 MI.eraseFromParent();
6074 unsigned VTBits = 2 * NVTBits;
6077 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6078 if (Amt.
ugt(VTBits)) {
6080 }
else if (Amt.
ugt(NVTBits)) {
6083 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6084 }
else if (Amt == NVTBits) {
6092 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6095 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6096 if (Amt.
ugt(VTBits)) {
6098 }
else if (Amt.
ugt(NVTBits)) {
6100 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6102 }
else if (Amt == NVTBits) {
6106 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6108 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6110 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6116 if (Amt.
ugt(VTBits)) {
6118 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6119 }
else if (Amt.
ugt(NVTBits)) {
6121 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6123 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6124 }
else if (Amt == NVTBits) {
6127 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6129 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6131 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6133 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6140 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6141 MI.eraseFromParent();
6157 LLT DstTy = MRI.getType(DstReg);
6162 LLT ShiftAmtTy = MRI.getType(Amt);
6164 if (DstEltSize % 2 != 0)
6180 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6191 const unsigned NewBitSize = DstEltSize / 2;
6203 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6205 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6206 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6209 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6210 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6212 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6217 switch (
MI.getOpcode()) {
6218 case TargetOpcode::G_SHL: {
6220 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6222 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6223 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6224 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6227 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6228 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6230 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6232 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6234 ResultRegs[0] =
Lo.getReg(0);
6235 ResultRegs[1] =
Hi.getReg(0);
6238 case TargetOpcode::G_LSHR:
6239 case TargetOpcode::G_ASHR: {
6241 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6243 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6244 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6245 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6249 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6252 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6253 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6255 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6259 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6261 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6263 ResultRegs[0] =
Lo.getReg(0);
6264 ResultRegs[1] =
Hi.getReg(0);
6271 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6272 MI.eraseFromParent();
6281 LLT TargetTy,
LLT ShiftAmtTy) {
6284 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6286 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6287 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6288 const bool NeedsInterWordShift = ShiftBits != 0;
6291 case TargetOpcode::G_SHL: {
6294 if (PartIdx < ShiftWords)
6297 unsigned SrcIdx = PartIdx - ShiftWords;
6298 if (!NeedsInterWordShift)
6299 return SrcParts[SrcIdx];
6304 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6308 return Hi.getReg(0);
6311 case TargetOpcode::G_LSHR: {
6312 unsigned SrcIdx = PartIdx + ShiftWords;
6313 if (SrcIdx >= NumParts)
6315 if (!NeedsInterWordShift)
6316 return SrcParts[SrcIdx];
6320 if (SrcIdx + 1 < NumParts) {
6321 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6325 return Lo.getReg(0);
6328 case TargetOpcode::G_ASHR: {
6330 unsigned SrcIdx = PartIdx + ShiftWords;
6331 if (SrcIdx >= NumParts)
6333 if (!NeedsInterWordShift)
6334 return SrcParts[SrcIdx];
6339 (SrcIdx == NumParts - 1)
6343 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6365 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6366 ?
static_cast<unsigned>(TargetOpcode::G_LSHR)
6371 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6380 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6381 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6383 auto IsZeroBitShift =
6391 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6392 : TargetOpcode::G_SHL;
6395 auto TargetBitsConst =
6397 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6402 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6407 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6409 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6413 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6426 LLT DstTy = MRI.getType(DstReg);
6430 const unsigned NumParts = DstBits / TargetBits;
6432 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6442 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6443 MI.eraseFromParent();
6448 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6449 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6455 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6459 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6462 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6463 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6467 for (
unsigned I = 0;
I < NumParts; ++
I)
6469 Params, TargetTy, ShiftAmtTy);
6471 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6472 MI.eraseFromParent();
6481 LLT DstTy = MRI.getType(DstReg);
6482 LLT ShiftAmtTy = MRI.getType(AmtReg);
6486 const unsigned NumParts = DstBits / TargetBits;
6488 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6505 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6517 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6518 auto TargetBitsLog2Const =
6519 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6520 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6523 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6525 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6533 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6534 auto TargetBitsMinusOneConst =
6535 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6537 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6538 TargetBitsMinusOneConst)
6541 FillValue = ZeroReg;
6549 for (
unsigned I = 0;
I < NumParts; ++
I) {
6551 Register InBoundsResult = FillValue;
6561 for (
unsigned K = 0; K < NumParts; ++K) {
6562 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6564 WordShift, WordShiftKConst);
6576 switch (
MI.getOpcode()) {
6577 case TargetOpcode::G_SHL:
6578 MainSrcIdx = (int)
I - (
int)K;
6579 CarrySrcIdx = MainSrcIdx - 1;
6581 case TargetOpcode::G_LSHR:
6582 case TargetOpcode::G_ASHR:
6583 MainSrcIdx = (int)
I + (
int)K;
6584 CarrySrcIdx = MainSrcIdx + 1;
6592 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6593 Register MainOp = SrcParts[MainSrcIdx];
6597 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6598 CarryOp = SrcParts[CarrySrcIdx];
6599 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6600 CarrySrcIdx >= (
int)NumParts)
6601 CarryOp = FillValue;
6607 ResultForK = FillValue;
6613 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6620 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6624 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6625 MI.eraseFromParent();
6632 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6635 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6650 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6655 "getNeutralElementForVecReduce called with invalid opcode!");
6656 case TargetOpcode::G_VECREDUCE_ADD:
6657 case TargetOpcode::G_VECREDUCE_OR:
6658 case TargetOpcode::G_VECREDUCE_XOR:
6659 case TargetOpcode::G_VECREDUCE_UMAX:
6661 case TargetOpcode::G_VECREDUCE_MUL:
6663 case TargetOpcode::G_VECREDUCE_AND:
6664 case TargetOpcode::G_VECREDUCE_UMIN:
6667 case TargetOpcode::G_VECREDUCE_SMAX:
6670 case TargetOpcode::G_VECREDUCE_SMIN:
6673 case TargetOpcode::G_VECREDUCE_FADD:
6675 case TargetOpcode::G_VECREDUCE_FMUL:
6677 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6678 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6679 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6680 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6688 unsigned Opc =
MI.getOpcode();
6690 case TargetOpcode::G_IMPLICIT_DEF:
6691 case TargetOpcode::G_LOAD: {
6699 case TargetOpcode::G_STORE:
6706 case TargetOpcode::G_AND:
6707 case TargetOpcode::G_OR:
6708 case TargetOpcode::G_XOR:
6709 case TargetOpcode::G_ADD:
6710 case TargetOpcode::G_SUB:
6711 case TargetOpcode::G_MUL:
6712 case TargetOpcode::G_FADD:
6713 case TargetOpcode::G_FSUB:
6714 case TargetOpcode::G_FMUL:
6715 case TargetOpcode::G_FDIV:
6716 case TargetOpcode::G_FCOPYSIGN:
6717 case TargetOpcode::G_UADDSAT:
6718 case TargetOpcode::G_USUBSAT:
6719 case TargetOpcode::G_SADDSAT:
6720 case TargetOpcode::G_SSUBSAT:
6721 case TargetOpcode::G_SMIN:
6722 case TargetOpcode::G_SMAX:
6723 case TargetOpcode::G_UMIN:
6724 case TargetOpcode::G_UMAX:
6725 case TargetOpcode::G_FMINNUM:
6726 case TargetOpcode::G_FMAXNUM:
6727 case TargetOpcode::G_FMINNUM_IEEE:
6728 case TargetOpcode::G_FMAXNUM_IEEE:
6729 case TargetOpcode::G_FMINIMUM:
6730 case TargetOpcode::G_FMAXIMUM:
6731 case TargetOpcode::G_FMINIMUMNUM:
6732 case TargetOpcode::G_FMAXIMUMNUM:
6733 case TargetOpcode::G_STRICT_FADD:
6734 case TargetOpcode::G_STRICT_FSUB:
6735 case TargetOpcode::G_STRICT_FMUL: {
6743 case TargetOpcode::G_SHL:
6744 case TargetOpcode::G_ASHR:
6745 case TargetOpcode::G_LSHR: {
6751 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6757 case TargetOpcode::G_FMA:
6758 case TargetOpcode::G_STRICT_FMA:
6759 case TargetOpcode::G_FSHR:
6760 case TargetOpcode::G_FSHL: {
6769 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6770 case TargetOpcode::G_EXTRACT:
6777 case TargetOpcode::G_INSERT:
6778 case TargetOpcode::G_INSERT_VECTOR_ELT:
6779 case TargetOpcode::G_FREEZE:
6780 case TargetOpcode::G_FNEG:
6781 case TargetOpcode::G_FABS:
6782 case TargetOpcode::G_FSQRT:
6783 case TargetOpcode::G_FCEIL:
6784 case TargetOpcode::G_FFLOOR:
6785 case TargetOpcode::G_FNEARBYINT:
6786 case TargetOpcode::G_FRINT:
6787 case TargetOpcode::G_INTRINSIC_ROUND:
6788 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6789 case TargetOpcode::G_INTRINSIC_TRUNC:
6790 case TargetOpcode::G_BITREVERSE:
6791 case TargetOpcode::G_BSWAP:
6792 case TargetOpcode::G_FCANONICALIZE:
6793 case TargetOpcode::G_SEXT_INREG:
6794 case TargetOpcode::G_ABS:
6795 case TargetOpcode::G_CTLZ:
6796 case TargetOpcode::G_CTPOP:
6804 case TargetOpcode::G_SELECT: {
6805 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6807 if (!CondTy.isScalar() ||
6813 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6815 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6820 if (CondTy.isVector())
6830 case TargetOpcode::G_UNMERGE_VALUES:
6832 case TargetOpcode::G_PHI:
6834 case TargetOpcode::G_SHUFFLE_VECTOR:
6836 case TargetOpcode::G_BUILD_VECTOR: {
6838 for (
auto Op :
MI.uses()) {
6846 MIRBuilder.buildDeleteTrailingVectorElements(
6847 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6848 MI.eraseFromParent();
6851 case TargetOpcode::G_SEXT:
6852 case TargetOpcode::G_ZEXT:
6853 case TargetOpcode::G_ANYEXT:
6854 case TargetOpcode::G_TRUNC:
6855 case TargetOpcode::G_FPTRUNC:
6856 case TargetOpcode::G_FPEXT:
6857 case TargetOpcode::G_FPTOSI:
6858 case TargetOpcode::G_FPTOUI:
6859 case TargetOpcode::G_FPTOSI_SAT:
6860 case TargetOpcode::G_FPTOUI_SAT:
6861 case TargetOpcode::G_SITOFP:
6862 case TargetOpcode::G_UITOFP: {
6869 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6872 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6880 case TargetOpcode::G_ICMP:
6881 case TargetOpcode::G_FCMP: {
6889 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6894 case TargetOpcode::G_BITCAST: {
6898 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6899 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
6915 case TargetOpcode::G_VECREDUCE_FADD:
6916 case TargetOpcode::G_VECREDUCE_FMUL:
6917 case TargetOpcode::G_VECREDUCE_ADD:
6918 case TargetOpcode::G_VECREDUCE_MUL:
6919 case TargetOpcode::G_VECREDUCE_AND:
6920 case TargetOpcode::G_VECREDUCE_OR:
6921 case TargetOpcode::G_VECREDUCE_XOR:
6922 case TargetOpcode::G_VECREDUCE_SMAX:
6923 case TargetOpcode::G_VECREDUCE_SMIN:
6924 case TargetOpcode::G_VECREDUCE_UMAX:
6925 case TargetOpcode::G_VECREDUCE_UMIN: {
6926 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
6928 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6929 auto NeutralElement = getNeutralElementForVecReduce(
6935 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
6936 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6937 NeutralElement, Idx);
6941 MO.
setReg(NewVec.getReg(0));
6953 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6955 unsigned MaskNumElts = Mask.size();
6956 unsigned SrcNumElts = SrcTy.getNumElements();
6959 if (MaskNumElts == SrcNumElts)
6962 if (MaskNumElts < SrcNumElts) {
6970 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
6971 MI.getOperand(1).getReg(),
6972 MI.getOperand(2).getReg(), NewMask);
6973 MI.eraseFromParent();
6978 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
6979 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6988 MOps1[0] =
MI.getOperand(1).getReg();
6989 MOps2[0] =
MI.getOperand(2).getReg();
6991 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6992 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6996 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
6998 if (Idx >=
static_cast<int>(SrcNumElts))
6999 Idx += PaddedMaskNumElts - SrcNumElts;
7004 if (MaskNumElts != PaddedMaskNumElts) {
7006 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7009 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
7011 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
7016 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7019 MI.eraseFromParent();
7025 unsigned int TypeIdx,
LLT MoreTy) {
7026 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
7028 unsigned NumElts = DstTy.getNumElements();
7031 if (DstTy.isVector() && Src1Ty.isVector() &&
7032 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7040 if (DstTy != Src1Ty || DstTy != Src2Ty)
7048 for (
unsigned I = 0;
I != NumElts; ++
I) {
7050 if (Idx <
static_cast<int>(NumElts))
7053 NewMask[
I] = Idx - NumElts + WidenNumElts;
7057 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7058 MI.getOperand(1).getReg(),
7059 MI.getOperand(2).getReg(), NewMask);
7060 MI.eraseFromParent();
7069 unsigned SrcParts = Src1Regs.
size();
7070 unsigned DstParts = DstRegs.
size();
7072 unsigned DstIdx = 0;
7074 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7075 DstRegs[DstIdx] = FactorSum;
7080 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7082 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7083 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7085 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7089 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7090 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7092 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7102 if (DstIdx != DstParts - 1) {
7103 MachineInstrBuilder Uaddo =
7104 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
7105 FactorSum = Uaddo.
getReg(0);
7106 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7107 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7108 MachineInstrBuilder Uaddo =
7109 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
7110 FactorSum = Uaddo.
getReg(0);
7111 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7112 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7116 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7117 for (
unsigned i = 2; i < Factors.
size(); ++i)
7118 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7121 CarrySumPrevDstIdx = CarrySum;
7122 DstRegs[DstIdx] = FactorSum;
7134 LLT DstType = MRI.getType(DstReg);
7136 if (DstType.isVector())
7139 unsigned Opcode =
MI.getOpcode();
7140 unsigned OpO, OpE, OpF;
7142 case TargetOpcode::G_SADDO:
7143 case TargetOpcode::G_SADDE:
7144 case TargetOpcode::G_UADDO:
7145 case TargetOpcode::G_UADDE:
7146 case TargetOpcode::G_ADD:
7147 OpO = TargetOpcode::G_UADDO;
7148 OpE = TargetOpcode::G_UADDE;
7149 OpF = TargetOpcode::G_UADDE;
7150 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7151 OpF = TargetOpcode::G_SADDE;
7153 case TargetOpcode::G_SSUBO:
7154 case TargetOpcode::G_SSUBE:
7155 case TargetOpcode::G_USUBO:
7156 case TargetOpcode::G_USUBE:
7157 case TargetOpcode::G_SUB:
7158 OpO = TargetOpcode::G_USUBO;
7159 OpE = TargetOpcode::G_USUBE;
7160 OpF = TargetOpcode::G_USUBE;
7161 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7162 OpF = TargetOpcode::G_SSUBE;
7169 unsigned NumDefs =
MI.getNumExplicitDefs();
7170 Register Src1 =
MI.getOperand(NumDefs).getReg();
7171 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7174 CarryDst =
MI.getOperand(1).getReg();
7175 if (
MI.getNumOperands() == NumDefs + 3)
7176 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7178 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7179 LLT LeftoverTy, DummyTy;
7181 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7186 int NarrowParts = Src1Regs.
size();
7187 Src1Regs.
append(Src1Left);
7188 Src2Regs.
append(Src2Left);
7191 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7193 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7196 if (i == e - 1 && CarryDst)
7197 CarryOut = CarryDst;
7199 CarryOut = MRI.createGenericVirtualRegister(
LLT::scalar(1));
7202 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7203 {Src1Regs[i], Src2Regs[i]});
7204 }
else if (i == e - 1) {
7205 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7206 {Src1Regs[i], Src2Regs[i], CarryIn});
7208 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7209 {Src1Regs[i], Src2Regs[i], CarryIn});
7215 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7216 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7217 ArrayRef(DstRegs).drop_front(NarrowParts));
7219 MI.eraseFromParent();
7225 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7227 LLT Ty = MRI.getType(DstReg);
7231 unsigned Size = Ty.getSizeInBits();
7233 if (
Size % NarrowSize != 0)
7236 unsigned NumParts =
Size / NarrowSize;
7237 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7238 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7244 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7248 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7249 MI.eraseFromParent();
7259 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7262 LLT SrcTy = MRI.getType(Src);
7273 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7286 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7289 if (SizeOp1 % NarrowSize != 0)
7291 int NumParts = SizeOp1 / NarrowSize;
7294 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7298 uint64_t OpStart =
MI.getOperand(2).getImm();
7299 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7300 for (
int i = 0; i < NumParts; ++i) {
7301 unsigned SrcStart = i * NarrowSize;
7303 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7306 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7314 int64_t ExtractOffset;
7316 if (OpStart < SrcStart) {
7318 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7320 ExtractOffset = OpStart - SrcStart;
7321 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7325 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7327 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7328 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7335 if (MRI.getType(DstReg).isVector())
7336 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7337 else if (DstRegs.
size() > 1)
7338 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7341 MI.eraseFromParent();
7353 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7355 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7358 SrcRegs.
append(LeftoverRegs);
7362 uint64_t OpStart =
MI.getOperand(3).getImm();
7363 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7364 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7365 unsigned DstStart =
I * NarrowSize;
7367 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7375 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7377 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7381 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7389 int64_t ExtractOffset, InsertOffset;
7391 if (OpStart < DstStart) {
7393 ExtractOffset = DstStart - OpStart;
7394 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7396 InsertOffset = OpStart - DstStart;
7399 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7403 if (ExtractOffset != 0 || SegSize != OpSize) {
7405 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7406 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7409 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7410 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7418 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7421 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7423 MI.eraseFromParent();
7431 LLT DstTy = MRI.getType(DstReg);
7433 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7439 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7440 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7444 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7445 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7448 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7449 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7450 {Src0Regs[I], Src1Regs[I]});
7454 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7457 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7458 DstLeftoverRegs.
push_back(Inst.getReg(0));
7461 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7462 LeftoverTy, DstLeftoverRegs);
7464 MI.eraseFromParent();
7474 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7476 LLT DstTy = MRI.getType(DstReg);
7481 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7482 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7483 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7485 MI.eraseFromParent();
7495 Register CondReg =
MI.getOperand(1).getReg();
7496 LLT CondTy = MRI.getType(CondReg);
7497 if (CondTy.isVector())
7501 LLT DstTy = MRI.getType(DstReg);
7507 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7508 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7512 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7513 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7516 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7518 CondReg, Src1Regs[
I], Src2Regs[
I]);
7522 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7524 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7528 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7529 LeftoverTy, DstLeftoverRegs);
7531 MI.eraseFromParent();
7541 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7544 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7545 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7548 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7550 auto C_0 =
B.buildConstant(NarrowTy, 0);
7552 UnmergeSrc.getReg(1), C_0);
7553 auto LoCTLZ = IsUndef ?
7554 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7555 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7556 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7557 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7558 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7559 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7561 MI.eraseFromParent();
7574 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7577 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7578 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7581 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7583 auto C_0 =
B.buildConstant(NarrowTy, 0);
7585 UnmergeSrc.getReg(0), C_0);
7586 auto HiCTTZ = IsUndef ?
7587 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7588 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7589 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7590 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7591 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7592 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7594 MI.eraseFromParent();
7607 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7610 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7615 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7619 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7620 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7628 auto LoInv =
B.buildXor(DstTy,
Lo, Sign);
7629 auto LoCTLZ =
B.buildCTLZ(DstTy, LoInv);
7632 auto C_NarrowSizeM1 =
B.buildConstant(DstTy, NarrowSize - 1);
7633 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7635 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7637 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7639 MI.eraseFromParent();
7649 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7652 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7653 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7655 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7656 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7657 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7659 MI.eraseFromParent();
7674 LLT ExpTy = MRI.getType(ExpReg);
7679 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7680 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7681 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7682 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7684 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7686 MI.getOperand(2).setReg(Trunc.getReg(0));
7693 unsigned Opc =
MI.getOpcode();
7696 auto QAction = LI.getAction(Q).Action;
7702 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7705 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7709 case TargetOpcode::G_CTLZ: {
7710 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7711 unsigned Len = SrcTy.getScalarSizeInBits();
7713 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7715 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7716 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7719 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7720 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7721 MI.eraseFromParent();
7737 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7738 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7741 Op = MIBOp.getReg(0);
7746 MI.eraseFromParent();
7749 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7752 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7756 case TargetOpcode::G_CTTZ: {
7757 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7759 unsigned Len = SrcTy.getScalarSizeInBits();
7760 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7763 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7764 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7767 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7768 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7769 MI.eraseFromParent();
7776 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7777 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7779 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7780 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7781 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7782 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7785 MI.eraseFromParent();
7789 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7790 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7794 case TargetOpcode::G_CTPOP: {
7796 LLT Ty = MRI.getType(SrcReg);
7797 unsigned Size = Ty.getScalarSizeInBits();
7809 auto C_1 =
B.buildConstant(Ty, 1);
7810 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7812 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7813 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7814 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7818 auto C_2 =
B.buildConstant(Ty, 2);
7819 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7821 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7822 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7823 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7824 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7831 auto C_4 =
B.buildConstant(Ty, 4);
7832 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7833 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7835 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7836 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7838 assert(
Size <= 128 &&
"Scalar size is too large for CTPOP lower algorithm");
7841 if (
Size == 16 && !Ty.isVector()) {
7843 auto C_8 =
B.buildConstant(Ty, 8);
7844 auto HighSum =
B.buildLShr(Ty, B8Count, C_8);
7845 auto Res =
B.buildAdd(Ty, B8Count, HighSum);
7846 B.buildAnd(
MI.getOperand(0).getReg(), Res,
B.buildConstant(Ty, 0xFF));
7847 MI.eraseFromParent();
7856 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7858 auto IsMulSupported = [
this](
const LLT Ty) {
7859 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7862 if (IsMulSupported(Ty)) {
7863 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7864 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7866 auto ResTmp = B8Count;
7867 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7868 auto ShiftC =
B.buildConstant(Ty, Shift);
7869 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7870 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7872 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7874 MI.eraseFromParent();
7877 case TargetOpcode::G_CTLS: {
7878 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7882 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7883 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7885 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7891 MI.eraseFromParent();
7912 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7913 LLT Ty = MRI.getType(Dst);
7914 LLT ShTy = MRI.getType(Z);
7921 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7922 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7927 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
7928 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7932 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7945 MI.eraseFromParent();
7951 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7952 LLT Ty = MRI.getType(Dst);
7953 LLT ShTy = MRI.getType(Z);
7956 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7966 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
7967 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7968 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7969 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7970 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7974 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
7977 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7980 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7982 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
7983 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7984 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7987 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7989 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
7991 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7994 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7995 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
8000 MI.eraseFromParent();
8011 LLT Ty = MRI.getType(Dst);
8012 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
8014 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8015 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8018 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
8019 return lowerFunnelShiftAsShifts(
MI);
8023 if (Result == UnableToLegalize)
8024 return lowerFunnelShiftAsShifts(
MI);
8029 auto [Dst, Src] =
MI.getFirst2Regs();
8030 LLT DstTy = MRI.getType(Dst);
8031 LLT SrcTy = MRI.getType(Src);
8035 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8043 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8047 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8051 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8056 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8057 {UnmergeSrc.getReg(0)});
8058 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8059 {UnmergeSrc.getReg(1)});
8062 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8064 MI.eraseFromParent();
8081 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8085 LLT DstTy = MRI.getType(DstReg);
8086 LLT SrcTy = MRI.getType(SrcReg);
8094 SrcTy.getElementCount().divideCoefficientBy(2));
8107 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8119 MI.eraseFromParent();
8128 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8129 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8130 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8131 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8132 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8133 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8134 MI.eraseFromParent();
8139 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8141 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8142 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8147 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8148 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8150 return lowerRotateWithReverseRotate(
MI);
8153 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8154 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8155 bool IsFShLegal =
false;
8156 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8157 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8161 MI.eraseFromParent();
8166 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8169 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8174 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8175 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8176 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8182 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8183 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8185 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8191 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8192 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8194 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8196 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8201 MI.eraseFromParent();
8209 auto [Dst, Src] =
MI.getFirst2Regs();
8214 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8242 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8255 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8259 MI.eraseFromParent();
8267 auto [Dst, Src] =
MI.getFirst2Regs();
8272 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8285 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8287 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8292 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8294 MI.eraseFromParent();
8302 auto [Dst, Src] =
MI.getFirst2Regs();
8306 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8317 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8318 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8320 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8327 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8328 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8329 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8331 MI.eraseFromParent();
8342 SrcTy.changeElementType(
LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8343 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8349 MI.eraseFromParent();
8354 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8357 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8358 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8359 MIRBuilder.buildSelect(Dst, Src, True, False);
8360 MI.eraseFromParent();
8364 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8384 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8391 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8392 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8393 MIRBuilder.buildSelect(Dst, Src, True, False);
8394 MI.eraseFromParent();
8398 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8404 if (DstTy.getScalarSizeInBits() == 32) {
8411 auto SignBit =
MIRBuilder.buildConstant(I64, 63);
8412 auto S =
MIRBuilder.buildAShr(I64, L, SignBit);
8414 auto LPlusS =
MIRBuilder.buildAdd(I64, L, S);
8421 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8422 MI.eraseFromParent();
8430 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8434 if (SrcTy !=
S64 && SrcTy !=
S32)
8436 if (DstTy !=
S32 && DstTy !=
S64)
8463 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8465 MI.eraseFromParent();
8470 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8475 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8482 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8484 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8485 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8487 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8488 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8490 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8492 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8493 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8494 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8497 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8498 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8499 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8501 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8504 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8509 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8510 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8516 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8518 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8519 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8521 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8526 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8527 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8529 MI.eraseFromParent();
8535 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8537 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8538 unsigned SatWidth = DstTy.getScalarSizeInBits();
8542 APInt MinInt, MaxInt;
8565 if (AreExactFloatBounds) {
8567 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8570 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8572 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8581 MI.eraseFromParent();
8586 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8591 MI.eraseFromParent();
8598 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8606 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8616 MI.eraseFromParent();
8622 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8626 MI.eraseFromParent();
8636 auto [Dst, Src] =
MI.getFirst2Regs();
8638 MRI.getType(Src).getScalarType() ==
LLT::scalar(64));
8640 if (MRI.getType(Src).isVector())
8644 unsigned Flags =
MI.getFlags();
8647 MI.eraseFromParent();
8651 const unsigned ExpMask = 0x7ff;
8652 const unsigned ExpBiasf64 = 1023;
8653 const unsigned ExpBiasf16 = 15;
8682 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8742 MI.eraseFromParent();
8748 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8752 if (DstTy.getScalarType() ==
S16 && SrcTy.getScalarType() ==
S64)
8759 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8760 LLT Ty = MRI.getType(Dst);
8762 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8763 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8764 MI.eraseFromParent();
8769 auto [DstFrac, DstInt, Src] =
MI.getFirst3Regs();
8770 LLT Ty = MRI.getType(Src);
8771 auto Flags =
MI.getFlags();
8778 FracToUse = FracPart.getReg(0);
8780 auto Abs =
MIRBuilder.buildFAbs(Ty, Src, Flags);
8784 Ty.changeElementSize(1), Abs, Inf);
8785 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8787 FracToUse =
Select.getReg(0);
8790 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8793 MI.eraseFromParent();
8799 case TargetOpcode::G_SMIN:
8801 case TargetOpcode::G_SMAX:
8803 case TargetOpcode::G_UMIN:
8805 case TargetOpcode::G_UMAX:
8813 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8818 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8819 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8821 MI.eraseFromParent();
8830 LLT DstTy = MRI.getType(Dst);
8831 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8841 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
8842 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8844 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8847 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
8848 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
8849 if (TLI.preferSelectsOverBooleanArithmetic(
8852 auto One =
MIRBuilder.buildConstant(DstTy, 1);
8853 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8855 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
8856 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8862 unsigned BoolExtOp =
8864 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8865 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8869 MI.eraseFromParent();
8875 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8876 const int Src0Size = Src0Ty.getScalarSizeInBits();
8877 const int Src1Size = Src1Ty.getScalarSizeInBits();
8887 if (!(Src0Ty.getScalarType().isAnyScalar() ||
8888 Src0Ty.getScalarType().isInteger()))
8889 Src0Int =
MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
8891 if (!(Src1Ty.getScalarType().isAnyScalar() ||
8892 Src1Ty.getScalarType().isInteger()))
8893 Src1Int =
MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
8898 auto NotSignBitMask =
MIRBuilder.buildConstant(
8902 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
8904 if (Src0Ty == Src1Ty) {
8905 And1 =
MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
8906 }
else if (Src0Size > Src1Size) {
8907 auto ShiftAmt =
MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
8908 auto Zext =
MIRBuilder.buildZExt(Src0IntTy, Src1Int);
8909 auto Shift =
MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
8910 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8912 auto ShiftAmt =
MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
8913 auto Shift =
MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
8914 auto Trunc =
MIRBuilder.buildTrunc(Src0IntTy, Shift);
8915 And1 =
MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
8921 unsigned Flags =
MI.getFlags();
8926 if (DstTy == DstIntTy)
8927 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
8933 MI.eraseFromParent();
8944 switch (
MI.getOpcode()) {
8945 case TargetOpcode::G_FMINNUM:
8946 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8948 case TargetOpcode::G_FMINIMUMNUM:
8949 NewOp = TargetOpcode::G_FMINNUM;
8951 case TargetOpcode::G_FMAXNUM:
8952 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8954 case TargetOpcode::G_FMAXIMUMNUM:
8955 NewOp = TargetOpcode::G_FMAXNUM;
8961 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8962 LLT Ty = MRI.getType(Dst);
8972 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
8975 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
8980 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
8981 MI.eraseFromParent();
8987 unsigned Opc =
MI.getOpcode();
8988 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8989 LLT Ty = MRI.getType(Dst);
8992 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
8994 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8995 unsigned OpcNonIeee =
8996 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8997 bool MinMaxMustRespectOrderedZero =
false;
9001 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9003 MinMaxMustRespectOrderedZero =
true;
9004 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9009 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9017 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
9021 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9023 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9033 const unsigned Flags =
MI.getFlags();
9039 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9041 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9043 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9045 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9047 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9052 MI.eraseFromParent();
9059 LLT Ty = MRI.getType(DstReg);
9060 unsigned Flags =
MI.getFlags();
9065 MI.eraseFromParent();
9071 auto [DstReg,
X] =
MI.getFirst2Regs();
9072 const unsigned Flags =
MI.getFlags();
9073 const LLT Ty = MRI.getType(DstReg);
9085 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
9087 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
9092 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
9093 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9094 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9095 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9097 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9099 MI.eraseFromParent();
9104 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9105 unsigned Flags =
MI.getFlags();
9106 LLT Ty = MRI.getType(DstReg);
9113 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9114 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9117 SrcReg, Zero, Flags);
9119 SrcReg, Trunc, Flags);
9123 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9124 MI.eraseFromParent();
9130 const unsigned NumOps =
MI.getNumOperands();
9131 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9132 unsigned PartSize = Src0Ty.getSizeInBits();
9137 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9138 const unsigned Offset = (
I - 1) * PartSize;
9141 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9144 MRI.createGenericVirtualRegister(WideTy);
9147 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9148 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9149 ResultReg = NextResult;
9152 if (DstTy.isPointer()) {
9153 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9154 DstTy.getAddressSpace())) {
9162 MI.eraseFromParent();
9168 const unsigned NumDst =
MI.getNumOperands() - 1;
9169 Register SrcReg =
MI.getOperand(NumDst).getReg();
9170 Register Dst0Reg =
MI.getOperand(0).getReg();
9171 LLT DstTy = MRI.getType(Dst0Reg);
9180 LLT IntTy = MRI.getType(SrcReg);
9185 unsigned Offset = DstSize;
9186 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9188 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9192 MI.eraseFromParent();
9211 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9212 InsertVal =
MI.getOperand(2).getReg();
9214 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9216 LLT VecTy = MRI.getType(SrcVec);
9226 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9227 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9229 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9232 MI.eraseFromParent();
9237 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9248 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9255 int64_t
Offset = IdxVal * EltBytes;
9266 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9269 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9271 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9274 MI.eraseFromParent();
9280 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9281 MI.getFirst3RegLLTs();
9291 for (
int Idx : Mask) {
9293 if (!
Undef.isValid())
9299 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9301 int NumElts = Src0Ty.getNumElements();
9302 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9303 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9304 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9306 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9308 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9313 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9314 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9315 MI.eraseFromParent();
9321 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9322 MI.getFirst4RegLLTs();
9324 if (VecTy.isScalableVector())
9340 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9343 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9346 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9349 std::optional<APInt> PassthruSplatVal =
9352 if (PassthruSplatVal.has_value()) {
9354 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9355 }
else if (HasPassthru) {
9356 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9357 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9363 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9367 unsigned NumElmts = VecTy.getNumElements();
9368 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9370 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9373 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9376 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9381 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9383 if (HasPassthru &&
I == NumElmts - 1) {
9386 auto AllLanesSelected =
MIRBuilder.buildICmp(
9388 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9389 {OutPos, EndOfVector});
9393 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9395 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9400 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9402 MI.eraseFromParent();
9413 SPTmp =
MIRBuilder.buildCast(IntPtrTy, SPTmp);
9419 if (Alignment >
Align(1)) {
9422 auto AlignCst =
MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9431 const auto &MF = *
MI.getMF();
9437 Register AllocSize =
MI.getOperand(1).getReg();
9440 LLT PtrTy = MRI.getType(Dst);
9441 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9448 MI.eraseFromParent();
9454 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9459 MI.eraseFromParent();
9465 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9470 MI.eraseFromParent();
9476 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9477 unsigned Offset =
MI.getOperand(2).getImm();
9480 if (SrcTy.isVector()) {
9481 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9482 unsigned DstSize = DstTy.getSizeInBits();
9484 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9485 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9487 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9491 for (
unsigned Idx =
Offset / SrcEltSize;
9492 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9493 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9495 if (SubVectorElts.
size() == 1)
9496 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9498 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9500 MI.eraseFromParent();
9506 if ((SrcTy.isPointer() &&
9507 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9508 (DstTy.isPointer() &&
9509 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9510 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9514 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9515 (SrcTy.isScalar() || SrcTy.isPointer() ||
9516 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9517 LLT SrcIntTy = SrcTy;
9518 if (!SrcTy.isScalar()) {
9520 SrcReg =
MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9524 if (DstTy.isPointer())
9526 MRI.createGenericVirtualRegister(
LLT::scalar(DstTy.getSizeInBits()));
9532 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9536 if (DstTy.isPointer())
9539 MI.eraseFromParent();
9547 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9550 LLT DstTy = MRI.getType(Src);
9551 LLT InsertTy = MRI.getType(InsertSrc);
9554 bool IsNonIntegralInsert =
9564 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9565 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9572 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9574 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9578 for (; Idx <
Offset / EltSize; ++Idx) {
9579 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9584 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9585 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9587 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9591 InsertSrc =
MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9593 InsertSrc =
MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9600 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9603 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9604 MI.eraseFromParent();
9613 if (IsNonIntegralDst || IsNonIntegralInsert) {
9614 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9618 LLT IntDstTy = DstTy;
9622 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9627 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9633 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9639 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9640 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9641 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9644 MI.eraseFromParent();
9650 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9651 MI.getFirst4RegLLTs();
9652 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9655 LLT BoolTy = Dst1Ty;
9657 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9674 auto ResultLowerThanLHS =
9679 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9682 MI.eraseFromParent();
9688 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9689 const LLT Ty = MRI.getType(Res);
9692 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9693 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9694 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9705 MI.eraseFromParent();
9710 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9711 const LLT Ty = MRI.getType(Res);
9714 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9715 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9716 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9721 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9726 MI.eraseFromParent();
9732 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9733 LLT Ty = MRI.getType(Res);
9737 switch (
MI.getOpcode()) {
9740 case TargetOpcode::G_UADDSAT:
9743 BaseOp = TargetOpcode::G_ADD;
9745 case TargetOpcode::G_SADDSAT:
9748 BaseOp = TargetOpcode::G_ADD;
9750 case TargetOpcode::G_USUBSAT:
9753 BaseOp = TargetOpcode::G_SUB;
9755 case TargetOpcode::G_SSUBSAT:
9758 BaseOp = TargetOpcode::G_SUB;
9773 uint64_t NumBits = Ty.getScalarSizeInBits();
9784 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9792 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9797 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9798 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9801 MI.eraseFromParent();
9807 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9808 LLT Ty = MRI.getType(Res);
9812 unsigned OverflowOp;
9813 switch (
MI.getOpcode()) {
9816 case TargetOpcode::G_UADDSAT:
9819 OverflowOp = TargetOpcode::G_UADDO;
9821 case TargetOpcode::G_SADDSAT:
9824 OverflowOp = TargetOpcode::G_SADDO;
9826 case TargetOpcode::G_USUBSAT:
9829 OverflowOp = TargetOpcode::G_USUBO;
9831 case TargetOpcode::G_SSUBSAT:
9834 OverflowOp = TargetOpcode::G_SSUBO;
9839 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9840 Register Tmp = OverflowRes.getReg(0);
9841 Register Ov = OverflowRes.getReg(1);
9850 uint64_t NumBits = Ty.getScalarSizeInBits();
9851 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
9852 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9855 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
9863 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9867 MI.eraseFromParent();
9873 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9874 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9875 "Expected shlsat opcode!");
9876 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9877 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9878 LLT Ty = MRI.getType(Res);
9882 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
9883 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
9892 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9897 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9899 MI.eraseFromParent();
9904 auto [Dst, Src] =
MI.getFirst2Regs();
9905 const LLT Ty = MRI.getType(Src);
9906 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9907 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9910 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9911 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9912 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9913 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9916 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
9918 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9919 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
9920 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9922 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
9923 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9924 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9926 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9927 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9928 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9930 Res.getInstr()->getOperand(0).setReg(Dst);
9932 MI.eraseFromParent();
9939 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
9942 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9943 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9944 return B.buildOr(Dst,
LHS,
RHS);
9949 auto [Dst, Src] =
MI.getFirst2Regs();
9950 const LLT SrcTy = MRI.getType(Src);
9951 unsigned Size = SrcTy.getScalarSizeInBits();
9952 unsigned VSize = SrcTy.getSizeInBits();
9955 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9956 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9957 {LLT::fixed_vector(VSize / 8, 8),
9958 LLT::fixed_vector(VSize / 8, 8)}}))) {
9963 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
9964 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
9965 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
9969 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9992 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
9996 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
9999 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
10003 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
10007 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
10012 MI.eraseFromParent();
10020 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10021 int NameOpIdx = IsRead ? 1 : 0;
10022 int ValRegIndex = IsRead ? 0 : 1;
10024 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
10025 const LLT Ty = MRI.getType(ValReg);
10027 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10034 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
10035 Fn,
MI.getDebugLoc()));
10039 MI.eraseFromParent();
10048 MI.eraseFromParent();
10054 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
10055 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10056 Register Result =
MI.getOperand(0).getReg();
10057 LLT OrigTy = MRI.getType(Result);
10061 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
10062 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
10064 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10066 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
10067 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
10070 MI.eraseFromParent();
10076 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10081 MI.eraseFromParent();
10086 MI.eraseFromParent();
10093 unsigned BitSize = SrcTy.getScalarSizeInBits();
10097 auto AsInt =
MIRBuilder.buildCopy(IntTy, SrcReg);
10103 APInt ExpMask = Inf;
10105 APInt QNaNBitMask =
10109 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
10110 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
10111 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
10112 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
10113 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
10115 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10119 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
10121 LLT DstTyCopy = DstTy;
10123 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10151 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10154 Mask &= ~PartialCheck;
10163 else if (PartialCheck ==
fcZero)
10175 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10176 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10177 auto SubnormalRes =
10179 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10181 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10182 appendToRes(SubnormalRes);
10189 else if (PartialCheck ==
fcInf)
10194 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10201 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10202 if (PartialCheck ==
fcNan) {
10206 }
else if (PartialCheck ==
fcQNan) {
10216 Abs, InfWithQnanBitC);
10217 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10224 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10226 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10227 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10230 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10232 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10235 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10236 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10238 appendToRes(NormalRes);
10242 MI.eraseFromParent();
10248 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10249 MI.getFirst4RegLLTs();
10258 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10259 Op1Ty = MRI.getType(Op1Reg);
10260 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10261 Op2Ty = MRI.getType(Op2Reg);
10265 if (MaskTy.isScalar()) {
10273 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10277 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10279 if (DstTy.isVector()) {
10281 auto ShufSplat =
MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10282 MaskReg = ShufSplat.getReg(0);
10287 }
else if (!DstTy.isVector()) {
10292 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10296 if (!(Op1Ty.getScalarType().isAnyScalar() ||
10297 Op1Ty.getScalarType().isInteger()))
10298 Op1Reg =
MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10300 if (!(Op2Ty.getScalarType().isAnyScalar() ||
10301 Op2Ty.getScalarType().isInteger())) {
10303 Op2Ty.changeElementType(
LLT::integer(Op2Ty.getScalarSizeInBits()));
10304 Op2Reg =
MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10307 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10308 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10309 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10314 if (DstTy == Op1TyInt)
10317 auto Or =
MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10321 MI.eraseFromParent();
10327 unsigned Opcode =
MI.getOpcode();
10330 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10331 : TargetOpcode::G_UDIV,
10332 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10334 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10335 : TargetOpcode::G_UREM,
10336 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10337 MI.eraseFromParent();
10347 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10351 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10354 MI.eraseFromParent();
10364 Register SrcReg =
MI.getOperand(1).getReg();
10365 LLT Ty = MRI.getType(SrcReg);
10366 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10369 MI.eraseFromParent();
10375 Register SrcReg =
MI.getOperand(1).getReg();
10376 Register DestReg =
MI.getOperand(0).getReg();
10378 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10379 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10382 MI.eraseFromParent();
10388 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10389 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10390 "Expected G_ABDS or G_ABDU instruction");
10392 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10393 LLT Ty = MRI.getType(LHS);
10403 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10405 MI.eraseFromParent();
10411 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10412 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10413 "Expected G_ABDS or G_ABDU instruction");
10415 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10416 LLT Ty = MRI.getType(LHS);
10421 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10422 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10423 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10425 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10426 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10428 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10430 MI.eraseFromParent();
10435 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10440 if (!(SrcTy.getScalarType().isAnyScalar() ||
10441 SrcTy.getScalarType().isInteger())) {
10443 SrcTy.changeElementType(
LLT::integer(SrcTy.getScalarSizeInBits()));
10444 CastedSrc =
MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10447 if (MRI.getType(DstReg) != TyInt) {
10451 .buildAnd(TyInt, CastedSrc,
10454 DstTy.getScalarSizeInBits())))
10466 MI.eraseFromParent();
10472 Register SrcReg =
MI.getOperand(1).getReg();
10473 LLT SrcTy = MRI.getType(SrcReg);
10474 LLT DstTy = MRI.getType(SrcReg);
10477 if (SrcTy.isScalar()) {
10482 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10493 Register ListPtr =
MI.getOperand(1).getReg();
10494 LLT PtrTy = MRI.getType(ListPtr);
10501 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10503 const Align A(
MI.getOperand(2).getImm());
10505 if (
A > TLI.getMinStackArgumentAlignment()) {
10507 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10508 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10509 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10510 VAList = AndDst.getReg(0);
10517 LLT LLTTy = MRI.getType(Dst);
10520 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10521 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10526 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10528 Align EltAlignment =
DL.getABITypeAlign(Ty);
10531 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10533 MI.eraseFromParent();
10548 unsigned Limit,
const MemOp &
Op,
10549 unsigned DstAS,
unsigned SrcAS,
10550 const AttributeList &FuncAttributes,
10552 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
10562 if (
Op.isFixedDstAlign())
10563 while (
Op.getDstAlign() < Ty.getSizeInBytes() &&
10566 assert(Ty.getSizeInBits() > 0 &&
"Could not find valid type");
10570 unsigned NumMemOps = 0;
10573 unsigned TySize = Ty.getSizeInBytes();
10574 while (TySize >
Size) {
10583 assert(NewTySize > 0 &&
"Could not find appropriate type");
10590 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
10592 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
10598 TySize = NewTySize;
10602 if (++NumMemOps > Limit)
10605 MemOps.push_back(Ty);
10615 unsigned NumBits = Ty.getScalarSizeInBits();
10617 if (!Ty.isVector() && ValVRegAndVal) {
10618 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10626 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10647 uint64_t KnownLen,
Align Alignment,
10649 auto &MF = *
MI.getParent()->getParent();
10654 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10656 bool DstAlignCanChange =
false;
10660 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10662 DstAlignCanChange =
true;
10664 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10665 std::vector<LLT> MemOps;
10667 const auto &DstMMO = **
MI.memoperands_begin();
10668 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10671 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10682 if (DstAlignCanChange) {
10685 Align NewAlign =
DL.getABITypeAlign(IRTy);
10686 if (NewAlign > Alignment) {
10687 Alignment = NewAlign;
10695 MachineIRBuilder MIB(
MI);
10697 LLT LargestTy = MemOps[0];
10698 for (
unsigned i = 1; i < MemOps.size(); i++)
10700 LargestTy = MemOps[i];
10712 LLT PtrTy = MRI.getType(Dst);
10713 unsigned DstOff = 0;
10714 unsigned Size = KnownLen;
10715 for (
unsigned I = 0;
I < MemOps.size();
I++) {
10716 LLT Ty = MemOps[
I];
10718 if (TySize >
Size) {
10721 assert(
I == MemOps.size() - 1 &&
I != 0);
10722 DstOff -= TySize -
Size;
10732 TLI.isTruncateFree(LargestVT, VT))
10733 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10746 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10749 MIB.buildStore(
Value, Ptr, *StoreMMO);
10754 MI.eraseFromParent();
10760 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10762 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10764 const auto *MMOIt =
MI.memoperands_begin();
10766 bool IsVolatile =
MemOp->isVolatile();
10772 "inline memcpy with dynamic size is not yet supported");
10773 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10774 if (KnownLen == 0) {
10775 MI.eraseFromParent();
10779 const auto &DstMMO = **
MI.memoperands_begin();
10780 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10781 Align DstAlign = DstMMO.getBaseAlign();
10782 Align SrcAlign = SrcMMO.getBaseAlign();
10784 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10791 Align SrcAlign,
bool IsVolatile) {
10792 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10793 return lowerMemcpy(
MI, Dst, Src, KnownLen,
10794 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10801 Align SrcAlign,
bool IsVolatile) {
10802 auto &MF = *
MI.getParent()->getParent();
10807 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10809 bool DstAlignCanChange =
false;
10811 Align Alignment = std::min(DstAlign, SrcAlign);
10815 DstAlignCanChange =
true;
10821 std::vector<LLT> MemOps;
10823 const auto &DstMMO = **
MI.memoperands_begin();
10824 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10830 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10836 if (DstAlignCanChange) {
10839 Align NewAlign =
DL.getABITypeAlign(IRTy);
10844 if (!
TRI->hasStackRealignment(MF))
10846 NewAlign = std::min(NewAlign, *StackAlign);
10848 if (NewAlign > Alignment) {
10849 Alignment = NewAlign;
10857 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
10859 MachineIRBuilder MIB(
MI);
10865 unsigned CurrOffset = 0;
10866 unsigned Size = KnownLen;
10867 for (
auto CopyTy : MemOps) {
10870 if (CopyTy.getSizeInBytes() >
Size)
10871 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
10882 if (CurrOffset != 0) {
10883 LLT SrcTy = MRI.getType(Src);
10886 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10888 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10892 if (CurrOffset != 0) {
10893 LLT DstTy = MRI.getType(Dst);
10894 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
10896 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10897 CurrOffset += CopyTy.getSizeInBytes();
10898 Size -= CopyTy.getSizeInBytes();
10901 MI.eraseFromParent();
10907 uint64_t KnownLen,
Align DstAlign,
Align SrcAlign,
10909 auto &MF = *
MI.getParent()->getParent();
10914 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
10916 bool DstAlignCanChange =
false;
10919 Align Alignment = std::min(DstAlign, SrcAlign);
10921 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10923 DstAlignCanChange =
true;
10925 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10926 std::vector<LLT> MemOps;
10928 const auto &DstMMO = **
MI.memoperands_begin();
10929 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10930 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10931 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10938 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10944 if (DstAlignCanChange) {
10947 Align NewAlign =
DL.getABITypeAlign(IRTy);
10952 if (!
TRI->hasStackRealignment(MF))
10953 if (MaybeAlign StackAlign =
DL.getStackAlignment())
10954 NewAlign = std::min(NewAlign, *StackAlign);
10956 if (NewAlign > Alignment) {
10957 Alignment = NewAlign;
10965 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
10967 MachineIRBuilder MIB(
MI);
10971 unsigned CurrOffset = 0;
10972 SmallVector<Register, 16> LoadVals;
10973 for (
auto CopyTy : MemOps) {
10980 if (CurrOffset != 0) {
10981 LLT SrcTy = MRI.getType(Src);
10984 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
10986 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10987 CurrOffset += CopyTy.getSizeInBytes();
10991 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
10992 LLT CopyTy = MemOps[
I];
10998 if (CurrOffset != 0) {
10999 LLT DstTy = MRI.getType(Dst);
11002 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11004 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
11007 MI.eraseFromParent();
11013 const unsigned Opc =
MI.getOpcode();
11016 assert((
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMMOVE ||
11017 Opc == TargetOpcode::G_MEMSET) &&
11018 "Expected memcpy like instruction");
11020 auto MMOIt =
MI.memoperands_begin();
11025 auto [Dst, Src, Len] =
MI.getFirst3Regs();
11027 if (
Opc != TargetOpcode::G_MEMSET) {
11028 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
11029 MemOp = *(++MMOIt);
11030 SrcAlign =
MemOp->getBaseAlign();
11035 if (!LenVRegAndVal)
11037 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
11039 if (KnownLen == 0) {
11040 MI.eraseFromParent();
11044 if (MaxLen && KnownLen > MaxLen)
11047 bool IsVolatile =
MemOp->isVolatile();
11048 if (
Opc == TargetOpcode::G_MEMCPY) {
11049 auto &MF = *
MI.getParent()->getParent();
11052 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
11053 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
11056 if (
Opc == TargetOpcode::G_MEMMOVE)
11057 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
11058 if (
Opc == TargetOpcode::G_MEMSET)
11059 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
@ Custom
The result value requires a custom uniformity check.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.