44#define DEBUG_TYPE "legalizer"
57static std::pair<int, int>
63 unsigned NumParts =
Size / NarrowSize;
64 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
67 if (LeftoverSize == 0)
72 if (LeftoverSize % EltSize != 0)
81 return std::make_pair(NumParts, NumLeftover);
89 switch (Ty.getSizeInBits()) {
130 auto Step = LI.getAction(
MI, MRI);
131 switch (Step.Action) {
146 return bitcast(
MI, Step.TypeIdx, Step.NewType);
149 return lower(
MI, Step.TypeIdx, Step.NewType);
158 return LI.legalizeCustom(*
this,
MI, LocObserver) ?
Legalized
166void LegalizerHelper::insertParts(
Register DstReg,
188 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
190 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
191 return mergeMixedSubvectors(DstReg, AllRegs);
197 extractGCDType(GCDRegs, GCDTy, PartReg);
198 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
199 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
204 LLT Ty = MRI.getType(
Reg);
212void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
215 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
216 appendVectorElts(AllElts, PartRegs[i]);
219 if (!MRI.getType(Leftover).isVector())
222 appendVectorElts(AllElts, Leftover);
224 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
230 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
232 const int StartIdx = Regs.
size();
233 const int NumResults =
MI.getNumOperands() - 1;
235 for (
int I = 0;
I != NumResults; ++
I)
236 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
241 LLT SrcTy = MRI.getType(SrcReg);
242 if (SrcTy == GCDTy) {
248 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
255 LLT SrcTy = MRI.getType(SrcReg);
257 extractGCDType(Parts, GCDTy, SrcReg);
261LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
263 unsigned PadStrategy) {
268 int NumOrigSrc = VRegs.
size();
274 if (NumOrigSrc < NumParts * NumSubParts) {
275 if (PadStrategy == TargetOpcode::G_ZEXT)
276 PadReg =
MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
277 else if (PadStrategy == TargetOpcode::G_ANYEXT)
278 PadReg =
MIRBuilder.buildUndef(GCDTy).getReg(0);
280 assert(PadStrategy == TargetOpcode::G_SEXT);
285 PadReg =
MIRBuilder.buildAShr(GCDTy, VRegs.
back(), ShiftAmt).getReg(0);
301 for (
int I = 0;
I != NumParts; ++
I) {
302 bool AllMergePartsArePadding =
true;
305 for (
int J = 0; J != NumSubParts; ++J) {
306 int Idx =
I * NumSubParts + J;
307 if (Idx >= NumOrigSrc) {
308 SubMerge[J] = PadReg;
312 SubMerge[J] = VRegs[Idx];
315 AllMergePartsArePadding =
false;
321 if (AllMergePartsArePadding && !AllPadReg) {
322 if (PadStrategy == TargetOpcode::G_ANYEXT)
323 AllPadReg =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
324 else if (PadStrategy == TargetOpcode::G_ZEXT)
325 AllPadReg =
MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
334 Remerge[
I] = AllPadReg;
338 if (NumSubParts == 1)
339 Remerge[
I] = SubMerge[0];
341 Remerge[
I] =
MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
344 if (AllMergePartsArePadding && !AllPadReg)
345 AllPadReg = Remerge[
I];
348 VRegs = std::move(Remerge);
352void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
354 LLT DstTy = MRI.getType(DstReg);
359 if (DstTy == LCMTy) {
360 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
364 auto Remerge =
MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
373 UnmergeDefs[0] = DstReg;
374 for (
unsigned I = 1;
I != NumDefs; ++
I)
375 UnmergeDefs[
I] = MRI.createGenericVirtualRegister(DstTy);
378 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
386#define RTLIBCASE_INT(LibcallPrefix) \
390 return RTLIB::LibcallPrefix##32; \
392 return RTLIB::LibcallPrefix##64; \
394 return RTLIB::LibcallPrefix##128; \
396 llvm_unreachable("unexpected size"); \
400#define RTLIBCASE(LibcallPrefix) \
404 return RTLIB::LibcallPrefix##32; \
406 return RTLIB::LibcallPrefix##64; \
408 return RTLIB::LibcallPrefix##80; \
410 return RTLIB::LibcallPrefix##128; \
412 llvm_unreachable("unexpected size"); \
417 case TargetOpcode::G_LROUND:
419 case TargetOpcode::G_LLROUND:
421 case TargetOpcode::G_MUL:
423 case TargetOpcode::G_SDIV:
425 case TargetOpcode::G_UDIV:
427 case TargetOpcode::G_SREM:
429 case TargetOpcode::G_UREM:
431 case TargetOpcode::G_CTLZ_ZERO_POISON:
433 case TargetOpcode::G_FADD:
435 case TargetOpcode::G_FSUB:
437 case TargetOpcode::G_FMUL:
439 case TargetOpcode::G_FDIV:
441 case TargetOpcode::G_FEXP:
443 case TargetOpcode::G_FEXP2:
445 case TargetOpcode::G_FEXP10:
447 case TargetOpcode::G_FREM:
449 case TargetOpcode::G_FPOW:
451 case TargetOpcode::G_FPOWI:
453 case TargetOpcode::G_FMA:
455 case TargetOpcode::G_FSIN:
457 case TargetOpcode::G_FCOS:
459 case TargetOpcode::G_FTAN:
461 case TargetOpcode::G_FASIN:
463 case TargetOpcode::G_FACOS:
465 case TargetOpcode::G_FATAN:
467 case TargetOpcode::G_FATAN2:
469 case TargetOpcode::G_FSINH:
471 case TargetOpcode::G_FCOSH:
473 case TargetOpcode::G_FTANH:
475 case TargetOpcode::G_FSINCOS:
477 case TargetOpcode::G_FMODF:
479 case TargetOpcode::G_FLOG10:
481 case TargetOpcode::G_FLOG:
483 case TargetOpcode::G_FLOG2:
485 case TargetOpcode::G_FLDEXP:
487 case TargetOpcode::G_FCEIL:
489 case TargetOpcode::G_FFLOOR:
491 case TargetOpcode::G_FMINNUM:
493 case TargetOpcode::G_FMAXNUM:
495 case TargetOpcode::G_FMINIMUMNUM:
497 case TargetOpcode::G_FMAXIMUMNUM:
499 case TargetOpcode::G_FSQRT:
501 case TargetOpcode::G_FRINT:
503 case TargetOpcode::G_FNEARBYINT:
505 case TargetOpcode::G_INTRINSIC_TRUNC:
507 case TargetOpcode::G_INTRINSIC_ROUND:
509 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
511 case TargetOpcode::G_INTRINSIC_LRINT:
513 case TargetOpcode::G_INTRINSIC_LLRINT:
533 AttributeList CallerAttrs =
F.getAttributes();
534 if (AttrBuilder(
F.getContext(), CallerAttrs.getRetAttrs())
535 .removeAttribute(Attribute::NoAlias)
536 .removeAttribute(Attribute::NonNull)
541 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
542 CallerAttrs.hasRetAttr(Attribute::SExt))
553 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
560 if (!VReg.
isVirtual() || VReg !=
Next->getOperand(1).getReg())
568 if (Ret ==
MBB.instr_end() || !Ret->isReturn())
571 if (Ret->getNumImplicitOperands() != 1)
574 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
591 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
596 Info.OrigRet = Result;
599 (Result.Ty->isVoidTy() ||
600 Result.Ty ==
MIRBuilder.getMF().getFunction().getReturnType()) &&
608 if (
MI && Info.LoweredTailCall) {
609 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
619 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
620 "Expected instr following MI to be return or debug inst?");
623 Next->eraseFromParent();
624 }
while (
MI->getNextNode());
639 RTLIB::LibcallImpl LibcallImpl = Libcalls->getLibcallImpl(
Libcall);
640 if (LibcallImpl == RTLIB::Unsupported)
644 const CallingConv::ID CC = Libcalls->getLibcallImplCallingConv(LibcallImpl);
658 Args.push_back({MO.getReg(), OpType, 0});
677 unsigned AddrSpace =
DL.getAllocaAddrSpace();
695 if (LibcallResult != LegalizeResult::Legalized)
703 MIRBuilder.
buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
704 MIRBuilder.
buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
705 MI.eraseFromParent();
720 LLT DstTy = MRI.getType(DstFrac);
725 unsigned AddrSpace =
DL.getAllocaAddrSpace();
726 MachinePointerInfo PtrInfo;
735 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
738 if (LibcallResult != LegalizeResult::Legalized)
744 MIRBuilder.
buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
745 MI.eraseFromParent();
756 case TargetOpcode::G_FPEXT:
758 case TargetOpcode::G_FPTRUNC:
760 case TargetOpcode::G_FPTOSI:
762 case TargetOpcode::G_FPTOUI:
764 case TargetOpcode::G_SITOFP:
766 case TargetOpcode::G_UITOFP:
776 if (FromType->isIntegerTy()) {
777 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
778 Arg.
Flags[0].setSExt();
780 Arg.
Flags[0].setZExt();
791 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
795 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
799 LLT OpLLT = MRI.getType(Reg);
800 Type *OpTy =
nullptr;
805 Args.push_back({Reg, OpTy, 0});
808 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
809 RTLIB::Libcall RTLibcall;
810 unsigned Opc =
MI.getOpcode();
812 case TargetOpcode::G_BZERO:
813 RTLibcall = RTLIB::BZERO;
815 case TargetOpcode::G_MEMCPY:
816 RTLibcall = RTLIB::MEMCPY;
817 Args[0].Flags[0].setReturned();
819 case TargetOpcode::G_MEMMOVE:
820 RTLibcall = RTLIB::MEMMOVE;
821 Args[0].Flags[0].setReturned();
823 case TargetOpcode::G_MEMSET:
824 RTLibcall = RTLIB::MEMSET;
825 Args[0].Flags[0].setReturned();
834 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
837 if (RTLibcallImpl == RTLIB::Unsupported) {
844 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
851 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
858 if (Info.LoweredTailCall) {
859 assert(Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
869 (
Next->isCopy() ||
Next->isReturn() ||
Next->isDebugInstr()) &&
870 "Expected instr following MI to be return or debug inst?");
873 Next->eraseFromParent();
874 }
while (
MI.getNextNode());
884 unsigned Opc =
MI.getOpcode();
886 auto &MMO = AtomicMI.getMMO();
887 auto Ordering = MMO.getMergedOrdering();
888 LLT MemType = MMO.getMemoryType();
891 return RTLIB::UNKNOWN_LIBCALL;
893#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
895 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
897 case TargetOpcode::G_ATOMIC_CMPXCHG:
898 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
899 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
900 return getOutlineAtomicHelper(LC, Ordering, MemSize);
902 case TargetOpcode::G_ATOMICRMW_XCHG: {
903 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
904 return getOutlineAtomicHelper(LC, Ordering, MemSize);
906 case TargetOpcode::G_ATOMICRMW_ADD:
907 case TargetOpcode::G_ATOMICRMW_SUB: {
908 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
909 return getOutlineAtomicHelper(LC, Ordering, MemSize);
911 case TargetOpcode::G_ATOMICRMW_AND: {
912 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
913 return getOutlineAtomicHelper(LC, Ordering, MemSize);
915 case TargetOpcode::G_ATOMICRMW_OR: {
916 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
917 return getOutlineAtomicHelper(LC, Ordering, MemSize);
919 case TargetOpcode::G_ATOMICRMW_XOR: {
920 const RTLIB::Libcall LC[5][4] = {
LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
921 return getOutlineAtomicHelper(LC, Ordering, MemSize);
924 return RTLIB::UNKNOWN_LIBCALL;
937 unsigned Opc =
MI.getOpcode();
939 case TargetOpcode::G_ATOMIC_CMPXCHG:
940 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
943 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
944 MI.getFirst4RegLLTs();
947 if (
Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
948 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
949 NewLLT) =
MI.getFirst5RegLLTs();
959 case TargetOpcode::G_ATOMICRMW_XCHG:
960 case TargetOpcode::G_ATOMICRMW_ADD:
961 case TargetOpcode::G_ATOMICRMW_SUB:
962 case TargetOpcode::G_ATOMICRMW_AND:
963 case TargetOpcode::G_ATOMICRMW_OR:
964 case TargetOpcode::G_ATOMICRMW_XOR: {
965 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
968 if (
Opc == TargetOpcode::G_ATOMICRMW_AND)
972 else if (
Opc == TargetOpcode::G_ATOMICRMW_SUB)
987 auto &CLI = *
MIRBuilder.getMF().getSubtarget().getCallLowering();
989 RTLIB::LibcallImpl RTLibcallImpl = Libcalls->getLibcallImpl(RTLibcall);
992 if (RTLibcallImpl == RTLIB::Unsupported) {
999 Info.
CallConv = Libcalls->getLibcallImplCallingConv(RTLibcallImpl);
1013static RTLIB::Libcall
1015 RTLIB::Libcall RTLibcall;
1016 switch (
MI.getOpcode()) {
1017 case TargetOpcode::G_GET_FPENV:
1018 RTLibcall = RTLIB::FEGETENV;
1020 case TargetOpcode::G_SET_FPENV:
1021 case TargetOpcode::G_RESET_FPENV:
1022 RTLibcall = RTLIB::FESETENV;
1024 case TargetOpcode::G_GET_FPMODE:
1025 RTLibcall = RTLIB::FEGETMODE;
1027 case TargetOpcode::G_SET_FPMODE:
1028 case TargetOpcode::G_RESET_FPMODE:
1029 RTLibcall = RTLIB::FESETMODE;
1061 LLT StateTy = MRI.getType(Dst);
1064 MachinePointerInfo TempPtrInfo;
1068 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1073 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}), LocObserver,
1081 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1099 LLT StateTy = MRI.getType(Src);
1102 MachinePointerInfo TempPtrInfo;
1111 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1116 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1117 LocObserver,
nullptr);
1123static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1125#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1129 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1131 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1133 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1135 llvm_unreachable("unexpected size"); \
1166 LLT OpLLT = MRI.getType(
Cmp->getLHSReg());
1169 OpLLT != MRI.getType(
Cmp->getRHSReg()))
1176 LLT DstTy = MRI.getType(DstReg);
1177 const auto Cond =
Cmp->getCond();
1182 const auto BuildLibcall = [&](
const RTLIB::Libcall
Libcall,
1187 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1191 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1198 .buildICmp(ICmpPred, Res, Temp,
MIRBuilder.buildConstant(TempLLT, 0))
1204 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1206 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1219 const auto [OeqLibcall, OeqPred] =
1221 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1223 const auto [UnoLibcall, UnoPred] =
1225 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1240 const auto [OeqLibcall, OeqPred] =
1245 const auto [UnoLibcall, UnoPred] =
1250 if (NotOeq && NotUno)
1269 const auto [InversedLibcall, InversedPred] =
1271 if (!BuildLibcall(InversedLibcall,
1296 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1298 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1301 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1307 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}), LocObserver, &
MI);
1312 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
1314 switch (
MI.getOpcode()) {
1317 case TargetOpcode::G_MUL:
1318 case TargetOpcode::G_SDIV:
1319 case TargetOpcode::G_UDIV:
1320 case TargetOpcode::G_SREM:
1321 case TargetOpcode::G_UREM:
1322 case TargetOpcode::G_CTLZ_ZERO_POISON: {
1323 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1331 case TargetOpcode::G_FADD:
1332 case TargetOpcode::G_FSUB:
1333 case TargetOpcode::G_FMUL:
1334 case TargetOpcode::G_FDIV:
1335 case TargetOpcode::G_FMA:
1336 case TargetOpcode::G_FPOW:
1337 case TargetOpcode::G_FREM:
1338 case TargetOpcode::G_FCOS:
1339 case TargetOpcode::G_FSIN:
1340 case TargetOpcode::G_FTAN:
1341 case TargetOpcode::G_FACOS:
1342 case TargetOpcode::G_FASIN:
1343 case TargetOpcode::G_FATAN:
1344 case TargetOpcode::G_FATAN2:
1345 case TargetOpcode::G_FCOSH:
1346 case TargetOpcode::G_FSINH:
1347 case TargetOpcode::G_FTANH:
1348 case TargetOpcode::G_FLOG10:
1349 case TargetOpcode::G_FLOG:
1350 case TargetOpcode::G_FLOG2:
1351 case TargetOpcode::G_FEXP:
1352 case TargetOpcode::G_FEXP2:
1353 case TargetOpcode::G_FEXP10:
1354 case TargetOpcode::G_FCEIL:
1355 case TargetOpcode::G_FFLOOR:
1356 case TargetOpcode::G_FMINNUM:
1357 case TargetOpcode::G_FMAXNUM:
1358 case TargetOpcode::G_FMINIMUMNUM:
1359 case TargetOpcode::G_FMAXIMUMNUM:
1360 case TargetOpcode::G_FSQRT:
1361 case TargetOpcode::G_FRINT:
1362 case TargetOpcode::G_FNEARBYINT:
1363 case TargetOpcode::G_INTRINSIC_TRUNC:
1364 case TargetOpcode::G_INTRINSIC_ROUND:
1365 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1366 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1370 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1378 case TargetOpcode::G_FSINCOS: {
1379 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1383 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1388 case TargetOpcode::G_FMODF: {
1389 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1393 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1398 case TargetOpcode::G_LROUND:
1399 case TargetOpcode::G_LLROUND:
1400 case TargetOpcode::G_INTRINSIC_LRINT:
1401 case TargetOpcode::G_INTRINSIC_LLRINT: {
1402 LLT LLTy = MRI.getType(
MI.getOperand(1).getReg());
1406 Ctx, MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits());
1408 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1414 {{
MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &
MI);
1417 MI.eraseFromParent();
1420 case TargetOpcode::G_FPOWI:
1421 case TargetOpcode::G_FLDEXP: {
1422 LLT LLTy = MRI.getType(
MI.getOperand(0).getReg());
1426 Ctx, MRI.getType(
MI.getOperand(2).getReg()).getSizeInBits());
1428 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1433 {
MI.getOperand(1).getReg(), HLTy, 0},
1434 {
MI.getOperand(2).getReg(), ITy, 1}};
1435 Args[1].Flags[0].setSExt();
1437 Libcall, {
MI.getOperand(0).getReg(), HLTy, 0}, Args, LocObserver, &
MI);
1442 case TargetOpcode::G_FPEXT:
1443 case TargetOpcode::G_FPTRUNC: {
1446 if (!FromTy || !ToTy)
1453 case TargetOpcode::G_FCMP: {
1457 MI.eraseFromParent();
1460 case TargetOpcode::G_FPTOSI:
1461 case TargetOpcode::G_FPTOUI: {
1465 unsigned ToSize = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1466 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1469 FromTy, LocObserver);
1474 case TargetOpcode::G_SITOFP:
1475 case TargetOpcode::G_UITOFP: {
1476 unsigned FromSize = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1479 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1481 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1488 case TargetOpcode::G_ATOMICRMW_XCHG:
1489 case TargetOpcode::G_ATOMICRMW_ADD:
1490 case TargetOpcode::G_ATOMICRMW_SUB:
1491 case TargetOpcode::G_ATOMICRMW_AND:
1492 case TargetOpcode::G_ATOMICRMW_OR:
1493 case TargetOpcode::G_ATOMICRMW_XOR:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1501 case TargetOpcode::G_BZERO:
1502 case TargetOpcode::G_MEMCPY:
1503 case TargetOpcode::G_MEMMOVE:
1504 case TargetOpcode::G_MEMSET: {
1509 MI.eraseFromParent();
1512 case TargetOpcode::G_GET_FPENV:
1513 case TargetOpcode::G_GET_FPMODE: {
1519 case TargetOpcode::G_SET_FPENV:
1520 case TargetOpcode::G_SET_FPMODE: {
1526 case TargetOpcode::G_RESET_FPENV:
1527 case TargetOpcode::G_RESET_FPMODE: {
1535 MI.eraseFromParent();
1542 uint64_t SizeOp0 = MRI.getType(
MI.getOperand(0).getReg()).getSizeInBits();
1545 switch (
MI.getOpcode()) {
1548 case TargetOpcode::G_IMPLICIT_DEF: {
1550 LLT DstTy = MRI.getType(DstReg);
1558 if (SizeOp0 % NarrowSize != 0) {
1563 MI.eraseFromParent();
1567 int NumParts = SizeOp0 / NarrowSize;
1570 for (
int i = 0; i < NumParts; ++i)
1574 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1576 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1577 MI.eraseFromParent();
1580 case TargetOpcode::G_CONSTANT: {
1581 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1582 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1583 unsigned TotalSize = Ty.getSizeInBits();
1585 int NumParts = TotalSize / NarrowSize;
1588 for (
int I = 0;
I != NumParts; ++
I) {
1589 unsigned Offset =
I * NarrowSize;
1596 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1598 if (LeftoverBits != 0) {
1602 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1606 insertParts(
MI.getOperand(0).getReg(),
1607 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1609 MI.eraseFromParent();
1612 case TargetOpcode::G_SEXT:
1613 case TargetOpcode::G_ZEXT:
1614 case TargetOpcode::G_ANYEXT:
1616 case TargetOpcode::G_TRUNC: {
1620 uint64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
1622 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1626 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
1627 MIRBuilder.buildCopy(
MI.getOperand(0), Unmerge.getReg(0));
1628 MI.eraseFromParent();
1631 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1632 case TargetOpcode::G_FREEZE: {
1636 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
1641 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1).getReg());
1643 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1645 MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1649 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), Parts);
1650 MI.eraseFromParent();
1653 case TargetOpcode::G_ADD:
1654 case TargetOpcode::G_SUB:
1655 case TargetOpcode::G_SADDO:
1656 case TargetOpcode::G_SSUBO:
1657 case TargetOpcode::G_SADDE:
1658 case TargetOpcode::G_SSUBE:
1659 case TargetOpcode::G_UADDO:
1660 case TargetOpcode::G_USUBO:
1661 case TargetOpcode::G_UADDE:
1662 case TargetOpcode::G_USUBE:
1664 case TargetOpcode::G_MUL:
1665 case TargetOpcode::G_UMULH:
1667 case TargetOpcode::G_EXTRACT:
1669 case TargetOpcode::G_INSERT:
1671 case TargetOpcode::G_LOAD: {
1673 Register DstReg = LoadMI.getDstReg();
1674 LLT DstTy = MRI.getType(DstReg);
1678 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1679 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1680 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1682 LoadMI.eraseFromParent();
1688 case TargetOpcode::G_ZEXTLOAD:
1689 case TargetOpcode::G_SEXTLOAD:
1690 case TargetOpcode::G_FPEXTLOAD: {
1692 Register DstReg = LoadMI.getDstReg();
1693 Register PtrReg = LoadMI.getPointerReg();
1695 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1696 auto &MMO = LoadMI.getMMO();
1699 if (MemSize == NarrowSize) {
1701 }
else if (MemSize < NarrowSize) {
1702 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1703 }
else if (MemSize > NarrowSize) {
1715 LoadMI.eraseFromParent();
1718 case TargetOpcode::G_STORE: {
1721 Register SrcReg = StoreMI.getValueReg();
1722 LLT SrcTy = MRI.getType(SrcReg);
1723 if (SrcTy.isVector())
1726 int NumParts = SizeOp0 / NarrowSize;
1728 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1729 if (SrcTy.isVector() && LeftoverBits != 0)
1732 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1733 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1735 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1736 StoreMI.eraseFromParent();
1742 case TargetOpcode::G_FPTRUNCSTORE: {
1744 Register SrcReg = StoreMI.getValueReg();
1745 Register PtrReg = StoreMI.getPointerReg();
1747 auto &MMO = StoreMI.getMMO();
1749 if (MemSize > NarrowSize) {
1753 auto TmpReg =
MIRBuilder.buildFPTrunc(NarrowTy, SrcReg);
1754 if (MemSize == NarrowSize) {
1756 }
else if (MemSize < NarrowSize) {
1757 MIRBuilder.buildStoreInstr(TargetOpcode::G_FPTRUNCSTORE, TmpReg, PtrReg,
1761 StoreMI.eraseFromParent();
1764 case TargetOpcode::G_SELECT:
1766 case TargetOpcode::G_AND:
1767 case TargetOpcode::G_OR:
1768 case TargetOpcode::G_XOR: {
1780 case TargetOpcode::G_SHL:
1781 case TargetOpcode::G_LSHR:
1782 case TargetOpcode::G_ASHR:
1784 case TargetOpcode::G_CTLZ:
1785 case TargetOpcode::G_CTLZ_ZERO_POISON:
1786 case TargetOpcode::G_CTTZ:
1787 case TargetOpcode::G_CTTZ_ZERO_POISON:
1788 case TargetOpcode::G_CTLS:
1789 case TargetOpcode::G_CTPOP:
1791 switch (
MI.getOpcode()) {
1792 case TargetOpcode::G_CTLZ:
1793 case TargetOpcode::G_CTLZ_ZERO_POISON:
1795 case TargetOpcode::G_CTTZ:
1796 case TargetOpcode::G_CTTZ_ZERO_POISON:
1798 case TargetOpcode::G_CTPOP:
1800 case TargetOpcode::G_CTLS:
1810 case TargetOpcode::G_INTTOPTR:
1818 case TargetOpcode::G_PTRTOINT:
1826 case TargetOpcode::G_PHI: {
1829 if (SizeOp0 % NarrowSize != 0)
1832 unsigned NumParts = SizeOp0 / NarrowSize;
1836 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1844 for (
unsigned i = 0; i < NumParts; ++i) {
1845 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1847 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1848 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1849 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1852 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
1854 MI.eraseFromParent();
1857 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1858 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1862 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1868 case TargetOpcode::G_ICMP: {
1870 LLT SrcTy = MRI.getType(LHS);
1876 if (!
extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1882 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1883 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1889 LLT ResTy = MRI.getType(Dst);
1894 auto Zero =
MIRBuilder.buildConstant(NarrowTy, 0);
1896 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1897 auto LHS = std::get<0>(LHSAndRHS);
1898 auto RHS = std::get<1>(LHSAndRHS);
1899 auto Xor =
MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1906 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1907 auto LHS = std::get<0>(LHSAndRHS);
1908 auto RHS = std::get<1>(LHSAndRHS);
1909 auto Xor =
MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1910 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1911 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1912 TargetOpcode::G_ZEXT);
1919 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1920 auto Or =
MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1921 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1926 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1930 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1935 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1939 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[
I],
1942 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[
I],
1945 LHSPartRegs[
I], RHSPartRegs[
I]);
1946 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1952 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1961 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1965 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[
I],
1966 RHSLeftoverRegs[
I]);
1968 auto Cmp =
MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[
I],
1969 RHSLeftoverRegs[
I]);
1972 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1973 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1979 MI.eraseFromParent();
1982 case TargetOpcode::G_FCMP:
1991 case TargetOpcode::G_SEXT_INREG: {
1995 int64_t SizeInBits =
MI.getOperand(2).getImm();
2004 auto TruncMIB =
MIRBuilder.buildTrunc(NarrowTy, MO1);
2005 MO1.
setReg(TruncMIB.getReg(0));
2008 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
2020 if (SizeOp0 % NarrowSize != 0)
2022 int NumParts = SizeOp0 / NarrowSize;
2030 for (
int i = 0; i < NumParts; ++i) {
2031 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2046 for (
int i = 0; i < NumParts; ++i) {
2049 PartialExtensionReg = DstRegs.
back();
2051 assert(PartialExtensionReg &&
2052 "Expected to visit partial extension before full");
2053 if (FullExtensionReg) {
2058 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2060 FullExtensionReg = DstRegs.
back();
2065 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2068 PartialExtensionReg = DstRegs.
back();
2074 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2075 MI.eraseFromParent();
2078 case TargetOpcode::G_BSWAP:
2079 case TargetOpcode::G_BITREVERSE: {
2080 if (SizeOp0 % NarrowSize != 0)
2085 unsigned NumParts = SizeOp0 / NarrowSize;
2086 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2089 for (
unsigned i = 0; i < NumParts; ++i) {
2090 auto DstPart =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
2091 {SrcRegs[NumParts - 1 - i]});
2095 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), DstRegs);
2098 MI.eraseFromParent();
2101 case TargetOpcode::G_PTR_ADD:
2102 case TargetOpcode::G_PTRMASK: {
2110 case TargetOpcode::G_FPTOUI:
2111 case TargetOpcode::G_FPTOSI:
2112 case TargetOpcode::G_FPTOUI_SAT:
2113 case TargetOpcode::G_FPTOSI_SAT:
2115 case TargetOpcode::G_FPEXT:
2122 case TargetOpcode::G_FLDEXP:
2123 case TargetOpcode::G_STRICT_FLDEXP:
2125 case TargetOpcode::G_VSCALE: {
2127 LLT Ty = MRI.getType(Dst);
2131 auto VScaleBase =
MIRBuilder.buildVScale(NarrowTy, One);
2132 auto ZExt =
MIRBuilder.buildZExt(Ty, VScaleBase);
2133 auto C =
MIRBuilder.buildConstant(Ty, *
MI.getOperand(1).getCImm());
2136 MI.eraseFromParent();
2143 LLT Ty = MRI.getType(Val);
2149 if (Ty.isPointer()) {
2150 if (
DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2152 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2158 if (Ty.isPointerVector())
2159 NewVal =
MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2160 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2164 unsigned OpIdx,
unsigned ExtOpcode) {
2166 auto ExtB =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2167 MO.
setReg(ExtB.getReg(0));
2173 auto ExtB =
MIRBuilder.buildTrunc(NarrowTy, MO);
2174 MO.
setReg(ExtB.getReg(0));
2178 unsigned OpIdx,
unsigned TruncOpcode) {
2180 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2182 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2187 unsigned OpIdx,
unsigned ExtOpcode) {
2189 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2191 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2200 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2202 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2208 MO.
setReg(
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2218 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2225LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2230 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2231 if (DstTy.isVector())
2236 const int SrcSize = SrcTy.getSizeInBits();
2238 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2240 unsigned NumOps =
MI.getNumOperands();
2241 unsigned NumSrc =
MI.getNumOperands() - 1;
2242 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2244 if (WideSize >= DstSize) {
2248 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
2249 const unsigned Offset = (
I - 1) * PartSize;
2262 ResultReg = NextResult;
2265 if (WideSize > DstSize)
2267 else if (DstTy.isPointer())
2270 MI.eraseFromParent();
2295 const int GCD = std::gcd(SrcSize, WideSize);
2305 if (GCD == SrcSize) {
2308 auto Unmerge =
MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2309 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2315 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2317 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2321 const int PartsPerGCD = WideSize / GCD;
2325 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2327 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2334 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2336 auto FinalMerge =
MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2337 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2340 MI.eraseFromParent();
2345LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2350 int NumDst =
MI.getNumOperands() - 1;
2351 Register SrcReg =
MI.getOperand(NumDst).getReg();
2352 LLT SrcTy = MRI.getType(SrcReg);
2356 Register Dst0Reg =
MI.getOperand(0).getReg();
2357 LLT DstTy = MRI.getType(Dst0Reg);
2366 dbgs() <<
"Not casting non-integral address space integer\n");
2371 SrcReg =
MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2379 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2387 for (
int I = 1;
I != NumDst; ++
I) {
2388 auto ShiftAmt =
MIRBuilder.buildConstant(SrcTy, DstSize *
I);
2389 auto Shr =
MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2393 MI.eraseFromParent();
2404 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2408 WideSrc =
MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2411 auto Unmerge =
MIRBuilder.buildUnmerge(WideTy, WideSrc);
2429 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2434 if (PartsPerRemerge == 1) {
2437 for (
int I = 0;
I != NumUnmerge; ++
I) {
2438 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2440 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2441 int Idx =
I * PartsPerUnmerge + J;
2443 MIB.addDef(
MI.getOperand(Idx).getReg());
2446 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2450 MIB.addUse(Unmerge.getReg(
I));
2453 SmallVector<Register, 16> Parts;
2454 for (
int J = 0; J != NumUnmerge; ++J)
2455 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2458 for (
int I = 0;
I != NumDst; ++
I) {
2459 for (
int J = 0; J < PartsPerRemerge; ++J) {
2460 const int Idx =
I * PartsPerRemerge + J;
2464 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(
I).getReg(), RemergeParts);
2465 RemergeParts.
clear();
2469 MI.eraseFromParent();
2474LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2476 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2477 unsigned Offset =
MI.getOperand(2).getImm();
2480 if (SrcTy.
isVector() || DstTy.isVector())
2492 Src =
MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2496 if (DstTy.isPointer())
2503 MI.eraseFromParent();
2508 LLT ShiftTy = SrcTy;
2517 MI.eraseFromParent();
2548LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2550 if (TypeIdx != 0 || WideTy.
isVector())
2560LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2564 std::optional<Register> CarryIn;
2565 switch (
MI.getOpcode()) {
2568 case TargetOpcode::G_SADDO:
2569 Opcode = TargetOpcode::G_ADD;
2570 ExtOpcode = TargetOpcode::G_SEXT;
2572 case TargetOpcode::G_SSUBO:
2573 Opcode = TargetOpcode::G_SUB;
2574 ExtOpcode = TargetOpcode::G_SEXT;
2576 case TargetOpcode::G_UADDO:
2577 Opcode = TargetOpcode::G_ADD;
2578 ExtOpcode = TargetOpcode::G_ZEXT;
2580 case TargetOpcode::G_USUBO:
2581 Opcode = TargetOpcode::G_SUB;
2582 ExtOpcode = TargetOpcode::G_ZEXT;
2584 case TargetOpcode::G_SADDE:
2585 Opcode = TargetOpcode::G_UADDE;
2586 ExtOpcode = TargetOpcode::G_SEXT;
2587 CarryIn =
MI.getOperand(4).getReg();
2589 case TargetOpcode::G_SSUBE:
2590 Opcode = TargetOpcode::G_USUBE;
2591 ExtOpcode = TargetOpcode::G_SEXT;
2592 CarryIn =
MI.getOperand(4).getReg();
2594 case TargetOpcode::G_UADDE:
2595 Opcode = TargetOpcode::G_UADDE;
2596 ExtOpcode = TargetOpcode::G_ZEXT;
2597 CarryIn =
MI.getOperand(4).getReg();
2599 case TargetOpcode::G_USUBE:
2600 Opcode = TargetOpcode::G_USUBE;
2601 ExtOpcode = TargetOpcode::G_ZEXT;
2602 CarryIn =
MI.getOperand(4).getReg();
2618 auto LHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(2)});
2619 auto RHSExt =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {
MI.getOperand(3)});
2623 LLT CarryOutTy = MRI.getType(
MI.getOperand(1).getReg());
2625 .buildInstr(Opcode, {WideTy, CarryOutTy},
2626 {LHSExt, RHSExt, *CarryIn})
2629 NewOp =
MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).
getReg(0);
2631 LLT OrigTy = MRI.getType(
MI.getOperand(0).getReg());
2632 auto TruncOp =
MIRBuilder.buildTrunc(OrigTy, NewOp);
2633 auto ExtOp =
MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2638 MI.eraseFromParent();
2643LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2645 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2646 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2647 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2648 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2649 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2662 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2669 auto ShiftK =
MIRBuilder.buildConstant(WideTy, SHLAmount);
2673 auto WideInst =
MIRBuilder.buildInstr(
MI.getOpcode(), {WideTy},
2674 {ShiftL, ShiftR},
MI.getFlags());
2679 :
MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2682 MI.eraseFromParent();
2687LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2696 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2698 LLT SrcTy = MRI.getType(
LHS);
2699 LLT OverflowTy = MRI.getType(OriginalOverflow);
2706 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2707 auto LeftOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
LHS});
2708 auto RightOperand =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
RHS});
2715 WideMulCanOverflow ?
MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2717 MachineInstrBuilder Mulo;
2718 if (WideMulCanOverflow)
2719 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2720 {LeftOperand, RightOperand});
2722 Mulo =
MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2727 MachineInstrBuilder ExtResult;
2734 ExtResult =
MIRBuilder.buildSExtInReg(WideTy,
Mul, SrcBitWidth);
2738 ExtResult =
MIRBuilder.buildZExtInReg(WideTy,
Mul, SrcBitWidth);
2741 if (WideMulCanOverflow) {
2749 MI.eraseFromParent();
2755 unsigned Opcode =
MI.getOpcode();
2759 case TargetOpcode::G_ATOMICRMW_XCHG:
2760 case TargetOpcode::G_ATOMICRMW_ADD:
2761 case TargetOpcode::G_ATOMICRMW_SUB:
2762 case TargetOpcode::G_ATOMICRMW_AND:
2763 case TargetOpcode::G_ATOMICRMW_OR:
2764 case TargetOpcode::G_ATOMICRMW_XOR:
2765 case TargetOpcode::G_ATOMICRMW_MIN:
2766 case TargetOpcode::G_ATOMICRMW_MAX:
2767 case TargetOpcode::G_ATOMICRMW_UMIN:
2768 case TargetOpcode::G_ATOMICRMW_UMAX:
2769 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2775 case TargetOpcode::G_ATOMIC_CMPXCHG:
2776 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2783 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2793 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2798 case TargetOpcode::G_EXTRACT:
2799 return widenScalarExtract(
MI, TypeIdx, WideTy);
2800 case TargetOpcode::G_INSERT:
2801 return widenScalarInsert(
MI, TypeIdx, WideTy);
2802 case TargetOpcode::G_MERGE_VALUES:
2803 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2804 case TargetOpcode::G_UNMERGE_VALUES:
2805 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2806 case TargetOpcode::G_SADDO:
2807 case TargetOpcode::G_SSUBO:
2808 case TargetOpcode::G_UADDO:
2809 case TargetOpcode::G_USUBO:
2810 case TargetOpcode::G_SADDE:
2811 case TargetOpcode::G_SSUBE:
2812 case TargetOpcode::G_UADDE:
2813 case TargetOpcode::G_USUBE:
2814 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2815 case TargetOpcode::G_UMULO:
2816 case TargetOpcode::G_SMULO:
2817 return widenScalarMulo(
MI, TypeIdx, WideTy);
2818 case TargetOpcode::G_SADDSAT:
2819 case TargetOpcode::G_SSUBSAT:
2820 case TargetOpcode::G_SSHLSAT:
2821 case TargetOpcode::G_UADDSAT:
2822 case TargetOpcode::G_USUBSAT:
2823 case TargetOpcode::G_USHLSAT:
2824 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2825 case TargetOpcode::G_CTTZ:
2826 case TargetOpcode::G_CTTZ_ZERO_POISON:
2827 case TargetOpcode::G_CTLZ:
2828 case TargetOpcode::G_CTLZ_ZERO_POISON:
2829 case TargetOpcode::G_CTLS:
2830 case TargetOpcode::G_CTPOP: {
2843 case TargetOpcode::G_CTTZ:
2844 case TargetOpcode::G_CTTZ_ZERO_POISON:
2845 case TargetOpcode::G_CTLZ_ZERO_POISON:
2846 ExtOpc = TargetOpcode::G_ANYEXT;
2848 case TargetOpcode::G_CTLS:
2849 ExtOpc = TargetOpcode::G_SEXT;
2852 ExtOpc = TargetOpcode::G_ZEXT;
2855 auto MIBSrc =
MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2856 LLT CurTy = MRI.getType(SrcReg);
2857 unsigned NewOpc = Opcode;
2858 if (NewOpc == TargetOpcode::G_CTTZ) {
2865 WideTy, MIBSrc,
MIRBuilder.buildConstant(WideTy, TopBit));
2867 NewOpc = TargetOpcode::G_CTTZ_ZERO_POISON;
2873 if (Opcode == TargetOpcode::G_CTLZ_ZERO_POISON) {
2883 auto MIBNewOp =
MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2885 if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
2890 WideTy, MIBNewOp,
MIRBuilder.buildConstant(WideTy, SizeDiff),
2891 Opcode == TargetOpcode::G_CTLZ
2896 MIRBuilder.buildZExtOrTrunc(
MI.getOperand(0), MIBNewOp);
2897 MI.eraseFromParent();
2900 case TargetOpcode::G_BSWAP: {
2904 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2905 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2906 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2909 MI.getOperand(0).setReg(DstExt);
2913 LLT Ty = MRI.getType(DstReg);
2915 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2916 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2922 case TargetOpcode::G_BITREVERSE: {
2926 LLT Ty = MRI.getType(DstReg);
2929 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2931 MI.getOperand(0).setReg(DstExt);
2934 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, DiffBits);
2935 auto Shift =
MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2940 case TargetOpcode::G_FREEZE:
2941 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2948 case TargetOpcode::G_ABS:
2955 case TargetOpcode::G_ADD:
2956 case TargetOpcode::G_AND:
2957 case TargetOpcode::G_MUL:
2958 case TargetOpcode::G_OR:
2959 case TargetOpcode::G_XOR:
2960 case TargetOpcode::G_SUB:
2961 case TargetOpcode::G_SHUFFLE_VECTOR:
2972 case TargetOpcode::G_SBFX:
2973 case TargetOpcode::G_UBFX:
2987 case TargetOpcode::G_SHL:
3003 case TargetOpcode::G_ROTR:
3004 case TargetOpcode::G_ROTL:
3013 case TargetOpcode::G_SDIV:
3014 case TargetOpcode::G_SREM:
3015 case TargetOpcode::G_SMIN:
3016 case TargetOpcode::G_SMAX:
3017 case TargetOpcode::G_ABDS:
3025 case TargetOpcode::G_SDIVREM:
3035 case TargetOpcode::G_ASHR:
3036 case TargetOpcode::G_LSHR:
3040 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
3041 : TargetOpcode::G_ZEXT;
3054 case TargetOpcode::G_UDIV:
3055 case TargetOpcode::G_UREM:
3056 case TargetOpcode::G_ABDU:
3063 case TargetOpcode::G_UDIVREM:
3072 case TargetOpcode::G_UMIN:
3073 case TargetOpcode::G_UMAX: {
3074 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3076 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3080 ? TargetOpcode::G_SEXT
3081 : TargetOpcode::G_ZEXT;
3091 case TargetOpcode::G_SELECT:
3101 bool IsVec = MRI.getType(
MI.getOperand(1).getReg()).isVector();
3108 case TargetOpcode::G_FPEXT:
3116 case TargetOpcode::G_FPTOSI:
3117 case TargetOpcode::G_FPTOUI:
3118 case TargetOpcode::G_INTRINSIC_LRINT:
3119 case TargetOpcode::G_INTRINSIC_LLRINT:
3120 case TargetOpcode::G_IS_FPCLASS:
3130 case TargetOpcode::G_SITOFP:
3140 case TargetOpcode::G_UITOFP:
3150 case TargetOpcode::G_FPTOSI_SAT:
3151 case TargetOpcode::G_FPTOUI_SAT:
3156 LLT Ty = MRI.getType(OldDst);
3157 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3159 MI.getOperand(0).setReg(ExtReg);
3160 uint64_t ShortBits = Ty.getScalarSizeInBits();
3163 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3174 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3175 NewDst =
MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3183 NewDst =
MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3191 case TargetOpcode::G_LOAD:
3192 case TargetOpcode::G_SEXTLOAD:
3193 case TargetOpcode::G_ZEXTLOAD:
3194 case TargetOpcode::G_FPEXTLOAD:
3200 case TargetOpcode::G_STORE: {
3204 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
3205 assert(!Ty.isPointerOrPointerVector() &&
"Can't widen type");
3206 if (!Ty.isScalar()) {
3214 MI.setMemRefs(MF, {NewMMO});
3221 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3222 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3228 case TargetOpcode::G_FPTRUNCSTORE:
3235 case TargetOpcode::G_CONSTANT: {
3238 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3239 MRI.getType(
MI.getOperand(0).getReg()));
3240 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3241 ExtOpc == TargetOpcode::G_ANYEXT) &&
3244 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3248 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3254 case TargetOpcode::G_FCONSTANT: {
3260 auto IntCst =
MIRBuilder.buildConstant(
MI.getOperand(0).getReg(), Val);
3262 MI.eraseFromParent();
3265 case TargetOpcode::G_IMPLICIT_DEF: {
3271 case TargetOpcode::G_BRCOND:
3277 case TargetOpcode::G_FCMP:
3288 case TargetOpcode::G_ICMP:
3293 LLT SrcTy = MRI.getType(
MI.getOperand(2).getReg());
3297 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
3298 unsigned ExtOpcode =
3302 ? TargetOpcode::G_SEXT
3303 : TargetOpcode::G_ZEXT;
3310 case TargetOpcode::G_PTR_ADD:
3311 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3317 case TargetOpcode::G_PHI: {
3318 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3321 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3333 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3336 LLT VecTy = MRI.getType(VecReg);
3340 TargetOpcode::G_ANYEXT);
3354 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3370 LLT VecTy = MRI.getType(VecReg);
3389 case TargetOpcode::G_FADD:
3390 case TargetOpcode::G_FMUL:
3391 case TargetOpcode::G_FSUB:
3392 case TargetOpcode::G_FMA:
3393 case TargetOpcode::G_FMAD:
3394 case TargetOpcode::G_FNEG:
3395 case TargetOpcode::G_FABS:
3396 case TargetOpcode::G_FCANONICALIZE:
3397 case TargetOpcode::G_FMINNUM:
3398 case TargetOpcode::G_FMAXNUM:
3399 case TargetOpcode::G_FMINNUM_IEEE:
3400 case TargetOpcode::G_FMAXNUM_IEEE:
3401 case TargetOpcode::G_FMINIMUM:
3402 case TargetOpcode::G_FMAXIMUM:
3403 case TargetOpcode::G_FMINIMUMNUM:
3404 case TargetOpcode::G_FMAXIMUMNUM:
3405 case TargetOpcode::G_FDIV:
3406 case TargetOpcode::G_FREM:
3407 case TargetOpcode::G_FCEIL:
3408 case TargetOpcode::G_FFLOOR:
3409 case TargetOpcode::G_FCOS:
3410 case TargetOpcode::G_FSIN:
3411 case TargetOpcode::G_FTAN:
3412 case TargetOpcode::G_FACOS:
3413 case TargetOpcode::G_FASIN:
3414 case TargetOpcode::G_FATAN:
3415 case TargetOpcode::G_FATAN2:
3416 case TargetOpcode::G_FCOSH:
3417 case TargetOpcode::G_FSINH:
3418 case TargetOpcode::G_FTANH:
3419 case TargetOpcode::G_FLOG10:
3420 case TargetOpcode::G_FLOG:
3421 case TargetOpcode::G_FLOG2:
3422 case TargetOpcode::G_FRINT:
3423 case TargetOpcode::G_FNEARBYINT:
3424 case TargetOpcode::G_FSQRT:
3425 case TargetOpcode::G_FEXP:
3426 case TargetOpcode::G_FEXP2:
3427 case TargetOpcode::G_FEXP10:
3428 case TargetOpcode::G_FPOW:
3429 case TargetOpcode::G_INTRINSIC_TRUNC:
3430 case TargetOpcode::G_INTRINSIC_ROUND:
3431 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3435 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3441 case TargetOpcode::G_FMODF: {
3451 case TargetOpcode::G_FPOWI:
3452 case TargetOpcode::G_FLDEXP:
3453 case TargetOpcode::G_STRICT_FLDEXP: {
3455 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3476 case TargetOpcode::G_FFREXP: {
3489 case TargetOpcode::G_LROUND:
3490 case TargetOpcode::G_LLROUND:
3501 case TargetOpcode::G_INTTOPTR:
3509 case TargetOpcode::G_PTRTOINT:
3517 case TargetOpcode::G_BUILD_VECTOR: {
3521 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3527 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3535 case TargetOpcode::G_SEXT_INREG:
3544 case TargetOpcode::G_PTRMASK: {
3552 case TargetOpcode::G_VECREDUCE_ADD: {
3561 case TargetOpcode::G_VECREDUCE_FADD:
3562 case TargetOpcode::G_VECREDUCE_FMUL:
3563 case TargetOpcode::G_VECREDUCE_FMIN:
3564 case TargetOpcode::G_VECREDUCE_FMAX:
3565 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3566 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3571 LLT VecTy = MRI.getType(VecReg);
3578 case TargetOpcode::G_VSCALE: {
3585 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3590 case TargetOpcode::G_SPLAT_VECTOR: {
3599 case TargetOpcode::G_INSERT_SUBVECTOR: {
3607 LLT SubVecTy = MRI.getType(SubVec);
3611 auto BigZExt =
MIRBuilder.buildZExt(WideTy, BigVec);
3612 auto SubZExt =
MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3613 auto WideInsert =
MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3617 auto SplatZero =
MIRBuilder.buildSplatVector(
3622 MI.eraseFromParent();
3631 auto Unmerge =
B.buildUnmerge(Ty, Src);
3632 for (
int I = 0,
E = Unmerge->getNumOperands() - 1;
I !=
E; ++
I)
3641 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3655 MIRBuilder.
buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3664 MI.eraseFromParent();
3675 MI.eraseFromParent();
3682 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3683 if (SrcTy.isVector()) {
3687 if (DstTy.isVector()) {
3688 int NumDstElt = DstTy.getNumElements();
3689 int NumSrcElt = SrcTy.getNumElements();
3692 LLT DstCastTy = DstEltTy;
3693 LLT SrcPartTy = SrcEltTy;
3697 if (NumSrcElt < NumDstElt) {
3708 SrcPartTy = SrcEltTy;
3709 }
else if (NumSrcElt > NumDstElt) {
3721 DstCastTy = DstEltTy;
3726 SrcReg =
MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3730 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3731 MI.eraseFromParent();
3735 if (DstTy.isVector()) {
3738 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3739 MI.eraseFromParent();
3755 unsigned NewEltSize,
3756 unsigned OldEltSize) {
3757 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3758 LLT IdxTy =
B.getMRI()->getType(Idx);
3761 auto OffsetMask =
B.buildConstant(
3763 auto OffsetIdx =
B.buildAnd(IdxTy, Idx, OffsetMask);
3764 return B.buildShl(IdxTy, OffsetIdx,
3765 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3780 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] =
MI.getFirst3RegLLTs();
3784 unsigned OldNumElts = SrcVecTy.getNumElements();
3791 if (NewNumElts > OldNumElts) {
3802 if (NewNumElts % OldNumElts != 0)
3806 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3810 auto NewEltsPerOldEltK =
MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3813 auto NewBaseIdx =
MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3815 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3816 auto IdxOffset =
MIRBuilder.buildConstant(IdxTy,
I);
3817 auto TmpIdx =
MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3818 auto Elt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3819 NewOps[
I] = Elt.getReg(0);
3822 auto NewVec =
MIRBuilder.buildBuildVector(MidTy, NewOps);
3824 MI.eraseFromParent();
3828 if (NewNumElts < OldNumElts) {
3829 if (NewEltSize % OldEltSize != 0)
3851 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3852 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3855 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3859 WideElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3860 ScaledIdx).getReg(0);
3868 auto ExtractedBits =
MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3870 MI.eraseFromParent();
3884 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3885 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3886 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3887 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3890 auto EltMask =
B.buildConstant(
3894 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3895 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3898 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3902 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3916 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3917 MI.getFirst4RegLLTs();
3929 if (NewNumElts < OldNumElts) {
3930 if (NewEltSize % OldEltSize != 0)
3939 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3940 auto Log2Ratio =
MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3943 auto ScaledIdx =
MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3947 ExtractedElt =
MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3948 ScaledIdx).getReg(0);
3958 InsertedElt =
MIRBuilder.buildInsertVectorElement(
3959 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3963 MI.eraseFromParent();
3993 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3997 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3998 return UnableToLegalize;
4003 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
4005 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
4014 MI.eraseFromParent();
4032 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
4033 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
4043 auto Inp1 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
4044 auto Inp2 =
MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
4046 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
4047 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
4049 MI.eraseFromParent();
4079 LLT DstTy = MRI.getType(Dst);
4080 LLT SrcTy = MRI.getType(Src);
4086 if (DstTy == CastTy)
4094 if (CastEltSize < DstEltSize)
4097 auto AdjustAmt = CastEltSize / DstEltSize;
4098 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4099 SrcTyMinElts % AdjustAmt != 0)
4104 auto CastVec =
MIRBuilder.buildBitcast(SrcTy, Src);
4105 auto PromotedES =
MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4108 ES->eraseFromParent();
4143 LLT DstTy = MRI.getType(Dst);
4144 LLT BigVecTy = MRI.getType(BigVec);
4145 LLT SubVecTy = MRI.getType(SubVec);
4147 if (DstTy == CastTy)
4162 if (CastEltSize < DstEltSize)
4165 auto AdjustAmt = CastEltSize / DstEltSize;
4166 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4167 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4173 auto CastBigVec =
MIRBuilder.buildBitcast(BigVecTy, BigVec);
4174 auto CastSubVec =
MIRBuilder.buildBitcast(SubVecTy, SubVec);
4176 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4179 ES->eraseFromParent();
4187 LLT DstTy = MRI.getType(DstReg);
4197 if (MemSizeInBits != MemStoreSizeInBits) {
4214 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4218 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4219 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4221 auto NewLoad =
MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4224 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4226 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4229 if (DstTy != LoadTy)
4237 if (
MIRBuilder.getDataLayout().isBigEndian())
4255 uint64_t LargeSplitSize, SmallSplitSize;
4260 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4267 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4270 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4281 if (Alignment.
value() * 8 > MemSizeInBits &&
4286 auto NewLoad =
MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4303 LLT PtrTy = MRI.getType(PtrReg);
4316 auto LargeLoad =
MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4319 auto OffsetCst =
MIRBuilder.buildConstant(OffsetCstRes, LargeSplitSize / 8);
4320 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4321 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4323 SmallPtr, *SmallMMO);
4325 auto ShiftAmt =
MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4326 auto Shift =
MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4328 if (AnyExtTy == DstTy)
4329 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4331 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4335 auto Or =
MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4355 LLT SrcTy = MRI.getType(SrcReg);
4363 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4369 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4371 SrcReg =
MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4375 auto ZextInReg =
MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4379 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4394 uint64_t LargeSplitSize, SmallSplitSize;
4401 if (TLI.allowsMemoryAccess(Ctx,
MIRBuilder.getDataLayout(), MemTy, MMO))
4404 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4413 if (SrcTy.isPointer()) {
4415 SrcReg =
MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4418 auto ExtVal =
MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4421 auto ShiftAmt =
MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4422 auto SmallVal =
MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4425 LLT PtrTy = MRI.getType(PtrReg);
4427 LargeSplitSize / 8);
4428 auto SmallPtr =
MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4434 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4435 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4444 LLT SrcTy = MRI.getType(SrcReg);
4450 assert(SrcTy.isVector() &&
"Expect a vector store type");
4457 auto CurrVal =
MIRBuilder.buildConstant(IntTy, 0);
4461 auto Elt =
MIRBuilder.buildExtractVectorElement(
4462 SrcTy.getElementType(), SrcReg,
MIRBuilder.buildConstant(IdxTy,
I));
4463 auto Trunc =
MIRBuilder.buildTrunc(MemScalarTy, Elt);
4464 auto ZExt =
MIRBuilder.buildZExt(IntTy, Trunc);
4470 auto Shifted =
MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4471 CurrVal =
MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4475 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4486 switch (
MI.getOpcode()) {
4487 case TargetOpcode::G_LOAD: {
4505 case TargetOpcode::G_STORE: {
4521 case TargetOpcode::G_SELECT: {
4525 if (MRI.getType(
MI.getOperand(1).getReg()).isVector()) {
4527 dbgs() <<
"bitcast action not implemented for vector select\n");
4538 case TargetOpcode::G_AND:
4539 case TargetOpcode::G_OR:
4540 case TargetOpcode::G_XOR: {
4548 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4550 case TargetOpcode::G_INSERT_VECTOR_ELT:
4552 case TargetOpcode::G_CONCAT_VECTORS:
4554 case TargetOpcode::G_SHUFFLE_VECTOR:
4556 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4558 case TargetOpcode::G_INSERT_SUBVECTOR:
4566void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4576 switch(
MI.getOpcode()) {
4579 case TargetOpcode::G_FCONSTANT:
4581 case TargetOpcode::G_BITCAST:
4583 case TargetOpcode::G_SREM:
4584 case TargetOpcode::G_UREM: {
4585 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4587 MIRBuilder.buildInstr(
MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4588 {MI.getOperand(1), MI.getOperand(2)});
4590 auto Prod =
MIRBuilder.buildMul(Ty, Quot,
MI.getOperand(2));
4592 MI.eraseFromParent();
4595 case TargetOpcode::G_SADDO:
4596 case TargetOpcode::G_SSUBO:
4598 case TargetOpcode::G_SADDE:
4600 case TargetOpcode::G_SSUBE:
4602 case TargetOpcode::G_UMULH:
4603 case TargetOpcode::G_SMULH:
4605 case TargetOpcode::G_SMULO:
4606 case TargetOpcode::G_UMULO: {
4609 auto [Res, Overflow, LHS, RHS] =
MI.getFirst4Regs();
4610 LLT Ty = MRI.getType(Res);
4612 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4613 ? TargetOpcode::G_SMULH
4614 : TargetOpcode::G_UMULH;
4618 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4619 MI.removeOperand(1);
4622 auto HiPart =
MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4630 if (Opcode == TargetOpcode::G_SMULH) {
4631 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4632 auto Shifted =
MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4639 case TargetOpcode::G_FNEG: {
4640 auto [Res, ResTy, SubByReg, SubByRegTy] =
MI.getFirst2RegLLTs();
4643 Register CastedSubByReg = SubByReg;
4645 if (!SubByRegTy.getScalarType().isAnyScalar() &&
4646 !SubByRegTy.getScalarType().isInteger()) {
4647 auto BitcastDst = SubByRegTy.changeElementType(
4649 CastedSubByReg =
MIRBuilder.buildBitcast(BitcastDst, SubByReg).getReg(0);
4655 if (ResTy != TyInt) {
4657 MIRBuilder.buildXor(TyInt, CastedSubByReg, SignMask).getReg(0);
4660 MIRBuilder.buildXor(Res, CastedSubByReg, SignMask).getReg(0);
4662 MI.eraseFromParent();
4665 case TargetOpcode::G_FSUB:
4666 case TargetOpcode::G_STRICT_FSUB: {
4667 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
4668 LLT Ty = MRI.getType(Res);
4673 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4674 MIRBuilder.buildStrictFAdd(Res, LHS, Neg,
MI.getFlags());
4678 MI.eraseFromParent();
4681 case TargetOpcode::G_FMAD:
4683 case TargetOpcode::G_FFLOOR:
4685 case TargetOpcode::G_LROUND:
4686 case TargetOpcode::G_LLROUND: {
4689 LLT SrcTy = MRI.getType(SrcReg);
4690 auto Round =
MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4693 MI.eraseFromParent();
4696 case TargetOpcode::G_INTRINSIC_ROUND:
4698 case TargetOpcode::G_FRINT: {
4701 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4704 case TargetOpcode::G_INTRINSIC_LRINT:
4705 case TargetOpcode::G_INTRINSIC_LLRINT: {
4708 LLT SrcTy = MRI.getType(SrcReg);
4710 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4712 MI.eraseFromParent();
4715 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4716 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4717 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4718 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4719 **
MI.memoperands_begin());
4721 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4722 MI.eraseFromParent();
4725 case TargetOpcode::G_LOAD:
4726 case TargetOpcode::G_SEXTLOAD:
4727 case TargetOpcode::G_ZEXTLOAD:
4729 case TargetOpcode::G_STORE:
4731 case TargetOpcode::G_CTLZ_ZERO_POISON:
4732 case TargetOpcode::G_CTTZ_ZERO_POISON:
4733 case TargetOpcode::G_CTLZ:
4734 case TargetOpcode::G_CTTZ:
4735 case TargetOpcode::G_CTPOP:
4736 case TargetOpcode::G_CTLS:
4739 auto [Res, CarryOut, LHS, RHS] =
MI.getFirst4Regs();
4741 Register NewRes = MRI.cloneVirtualRegister(Res);
4748 MI.eraseFromParent();
4752 auto [Res, CarryOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
4753 const LLT CondTy = MRI.getType(CarryOut);
4754 const LLT Ty = MRI.getType(Res);
4756 Register NewRes = MRI.cloneVirtualRegister(Res);
4759 auto TmpRes =
MIRBuilder.buildAdd(Ty, LHS, RHS);
4765 auto ZExtCarryIn =
MIRBuilder.buildZExt(Ty, CarryIn);
4766 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4773 auto Carry2 =
MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4778 MI.eraseFromParent();
4782 auto [Res, BorrowOut, LHS, RHS] =
MI.getFirst4Regs();
4787 MI.eraseFromParent();
4791 auto [Res, BorrowOut, LHS, RHS, BorrowIn] =
MI.getFirst5Regs();
4792 const LLT CondTy = MRI.getType(BorrowOut);
4793 const LLT Ty = MRI.getType(Res);
4796 auto TmpRes =
MIRBuilder.buildSub(Ty, LHS, RHS);
4802 auto ZExtBorrowIn =
MIRBuilder.buildZExt(Ty, BorrowIn);
4803 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4810 auto Borrow2 =
MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4811 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4813 MI.eraseFromParent();
4853 case G_MERGE_VALUES:
4855 case G_UNMERGE_VALUES:
4857 case TargetOpcode::G_SEXT_INREG: {
4858 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4859 int64_t SizeInBits =
MI.getOperand(2).getImm();
4861 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4862 LLT DstTy = MRI.getType(DstReg);
4863 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4866 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4867 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4868 MI.eraseFromParent();
4871 case G_EXTRACT_VECTOR_ELT:
4872 case G_INSERT_VECTOR_ELT:
4874 case G_SHUFFLE_VECTOR:
4876 case G_VECTOR_COMPRESS:
4878 case G_DYN_STACKALLOC:
4882 case G_STACKRESTORE:
4892 case G_READ_REGISTER:
4893 case G_WRITE_REGISTER:
4900 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4901 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4907 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4912 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4923 bool IsSigned =
MI.getOpcode() == G_ABDS;
4924 LLT Ty = MRI.getType(
MI.getOperand(0).getReg());
4925 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4926 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4950 case G_MEMCPY_INLINE:
4951 return lowerMemcpyInline(
MI);
4962 case G_ATOMICRMW_SUB: {
4963 auto [Ret, Mem, Val] =
MI.getFirst3Regs();
4964 const LLT ValTy = MRI.getType(Val);
4968 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4969 MI.eraseFromParent();
4995 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4999 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
5005 Align StackTypeAlign =
5012 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
5013 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
5018 LLT IdxTy =
B.getMRI()->getType(IdxReg);
5030 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
5033 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
5044 "Converting bits to bytes lost precision");
5050 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
5051 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
5053 if (IdxTy != MRI.getType(Index))
5054 Index =
MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
5059 LLT PtrTy = MRI.getType(VecPtr);
5060 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr,
Mul).getReg(0);
5068 std::initializer_list<unsigned> NonVecOpIndices) {
5069 if (
MI.getNumMemOperands() != 0)
5086 if (!Ty.isVector()) {
5092 if (Ty.getNumElements() != NumElts)
5107 assert(Ty.isVector() &&
"Expected vector type");
5109 int NumParts, NumLeftover;
5110 std::tie(NumParts, NumLeftover) =
5113 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
5114 for (
int i = 0; i < NumParts; ++i) {
5119 assert(NumLeftover == 1 &&
"expected exactly one leftover");
5128 for (
unsigned i = 0; i <
N; ++i) {
5130 Ops.push_back(
Op.getReg());
5131 else if (
Op.isImm())
5132 Ops.push_back(
Op.getImm());
5133 else if (
Op.isPredicate())
5155 std::initializer_list<unsigned> NonVecOpIndices) {
5157 "Non-compatible opcode or not specified non-vector operands");
5158 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5160 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5161 unsigned NumDefs =
MI.getNumDefs();
5169 for (
unsigned i = 0; i < NumDefs; ++i) {
5170 makeDstOps(OutputOpsPieces[i], MRI.getType(
MI.getReg(i)), NumElts);
5178 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5179 ++UseIdx, ++UseNo) {
5182 MI.getOperand(UseIdx));
5191 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5195 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5197 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5198 Defs.
push_back(OutputOpsPieces[DstNo][i]);
5201 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5202 Uses.push_back(InputOpsPieces[InputNo][i]);
5205 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5206 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
5211 for (
unsigned i = 0; i < NumDefs; ++i)
5212 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
5214 for (
unsigned i = 0; i < NumDefs; ++i)
5215 MIRBuilder.buildMergeLikeInstr(
MI.getReg(i), OutputRegs[i]);
5218 MI.eraseFromParent();
5225 unsigned OrigNumElts = MRI.getType(
MI.getReg(0)).getNumElements();
5227 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
5228 unsigned NumDefs =
MI.getNumDefs();
5232 makeDstOps(OutputOpsPieces, MRI.getType(
MI.getReg(0)), NumElts);
5237 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
5238 UseIdx += 2, ++UseNo) {
5246 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5248 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5249 auto Phi =
MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5251 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5254 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
5255 Phi.addUse(InputOpsPieces[j][i]);
5256 Phi.add(
MI.getOperand(1 + j * 2 + 1));
5266 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
5268 MIRBuilder.buildMergeLikeInstr(
MI.getReg(0), OutputRegs);
5271 MI.eraseFromParent();
5279 const int NumDst =
MI.getNumOperands() - 1;
5280 const Register SrcReg =
MI.getOperand(NumDst).getReg();
5281 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
5282 LLT SrcTy = MRI.getType(SrcReg);
5284 if (TypeIdx != 1 || NarrowTy == DstTy)
5291 assert(SrcTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5294 if ((SrcTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5308 auto Unmerge =
MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5309 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5310 const int PartsPerUnmerge = NumDst / NumUnmerge;
5312 for (
int I = 0;
I != NumUnmerge; ++
I) {
5313 auto MIB =
MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5315 for (
int J = 0; J != PartsPerUnmerge; ++J)
5316 MIB.addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
5317 MIB.addUse(Unmerge.getReg(
I));
5320 MI.eraseFromParent();
5327 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5331 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
5333 if (NarrowTy == SrcTy)
5341 assert(SrcTy.isVector() &&
"Expected vector types");
5343 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
5357 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5358 auto Unmerge =
MIRBuilder.buildUnmerge(EltTy,
MI.getOperand(i).getReg());
5359 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5365 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5366 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5367 ++i,
Offset += NumNarrowTyElts) {
5370 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5373 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5374 MI.eraseFromParent();
5378 assert(TypeIdx == 0 &&
"Bad type index");
5379 if ((NarrowTy.
getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5394 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5395 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5397 for (
unsigned i = 0; i < NumParts; ++i) {
5399 for (
unsigned j = 0; j < NumElts; ++j)
5400 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5402 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5405 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5406 MI.eraseFromParent();
5414 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5416 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5418 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5420 InsertVal =
MI.getOperand(2).getReg();
5422 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
5423 LLT VecTy = MRI.getType(SrcVec);
5429 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5433 MI.eraseFromParent();
5442 SplitPieces[IdxVal] = InsertVal;
5443 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0).getReg(), SplitPieces);
5445 MIRBuilder.buildCopy(
MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5449 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5452 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5453 TargetOpcode::G_ANYEXT);
5457 LLT IdxTy = MRI.getType(Idx);
5458 int64_t PartIdx = IdxVal / NewNumElts;
5460 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5463 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5466 auto InsertPart =
MIRBuilder.buildInsertVectorElement(
5467 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5468 VecParts[PartIdx] = InsertPart.getReg(0);
5472 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5474 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5478 MI.eraseFromParent();
5498 LLVM_DEBUG(
dbgs() <<
"Can't narrow load/store to non-byte-sized type\n");
5510 LLT ValTy = MRI.getType(ValReg);
5519 int NumLeftover = -1;
5525 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5527 NumParts = NarrowRegs.
size();
5528 NumLeftover = NarrowLeftoverRegs.
size();
5535 LLT PtrTy = MRI.getType(AddrReg);
5545 auto MMO = LdStMI.
getMMO();
5547 unsigned NumParts,
unsigned Offset) ->
unsigned {
5550 for (
unsigned Idx = 0, E = NumParts; Idx != E &&
Offset < TotalSize;
5552 unsigned ByteOffset =
Offset / 8;
5555 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5562 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5563 ValRegs.push_back(Dst);
5564 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5566 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5575 unsigned HandledOffset =
5576 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5580 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5583 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5584 LeftoverTy, NarrowLeftoverRegs);
5598 switch (
MI.getOpcode()) {
5599 case G_IMPLICIT_DEF:
5615 case G_FCANONICALIZE:
5632 case G_INTRINSIC_LRINT:
5633 case G_INTRINSIC_LLRINT:
5634 case G_INTRINSIC_ROUND:
5635 case G_INTRINSIC_ROUNDEVEN:
5638 case G_INTRINSIC_TRUNC:
5666 case G_FMINNUM_IEEE:
5667 case G_FMAXNUM_IEEE:
5689 case G_CTLZ_ZERO_POISON:
5691 case G_CTTZ_ZERO_POISON:
5708 case G_ADDRSPACE_CAST:
5721 case G_STRICT_FLDEXP:
5723 case G_TRUNC_SSAT_S:
5724 case G_TRUNC_SSAT_U:
5725 case G_TRUNC_USAT_U:
5733 if (MRI.getType(
MI.getOperand(1).getReg()).isVector())
5738 case G_UNMERGE_VALUES:
5740 case G_BUILD_VECTOR:
5741 assert(TypeIdx == 0 &&
"not a vector type index");
5743 case G_CONCAT_VECTORS:
5747 case G_EXTRACT_VECTOR_ELT:
5748 case G_INSERT_VECTOR_ELT:
5757 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5758 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5760 case G_SHUFFLE_VECTOR:
5766 case G_INTRINSIC_FPTRUNC_ROUND:
5776 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5777 "Not a bitcast operation");
5782 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5784 unsigned NewElemCount =
5787 if (NewElemCount == 1) {
5790 auto Unmerge =
MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5797 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5806 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5807 MI.eraseFromParent();
5813 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5817 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5818 MI.getFirst3RegLLTs();
5821 if (DstTy != Src1Ty)
5823 if (DstTy != Src2Ty)
5838 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5854 unsigned InputUsed[2] = {-1U, -1U};
5855 unsigned FirstMaskIdx =
High * NewElts;
5856 bool UseBuildVector =
false;
5857 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5859 int Idx = Mask[FirstMaskIdx + MaskOffset];
5864 if (
Input >= std::size(Inputs)) {
5871 Idx -=
Input * NewElts;
5875 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5876 if (InputUsed[OpNo] ==
Input) {
5879 }
else if (InputUsed[OpNo] == -1U) {
5881 InputUsed[OpNo] =
Input;
5886 if (OpNo >= std::size(InputUsed)) {
5889 UseBuildVector =
true;
5894 Ops.push_back(Idx + OpNo * NewElts);
5897 if (UseBuildVector) {
5902 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5904 int Idx = Mask[FirstMaskIdx + MaskOffset];
5909 if (
Input >= std::size(Inputs)) {
5916 Idx -=
Input * NewElts;
5920 .buildExtractVectorElement(
5921 EltTy, Inputs[
Input],
5927 Output =
MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5928 }
else if (InputUsed[0] == -1U) {
5930 Output =
MIRBuilder.buildUndef(NarrowTy).getReg(0);
5931 }
else if (NewElts == 1) {
5932 Output =
MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5934 Register Op0 = Inputs[InputUsed[0]];
5938 : Inputs[InputUsed[1]];
5940 Output =
MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1,
Ops).getReg(0);
5947 MI.eraseFromParent();
5960 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5966 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5969 const unsigned NumParts =
5971 : SrcTy.getNumElements();
5975 if (DstTy != NarrowTy)
5981 unsigned NumPartsLeft = NumParts;
5982 while (NumPartsLeft > 1) {
5983 for (
unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5986 .buildInstr(ScalarOpc, {NarrowTy},
5987 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5990 SplitSrcs = PartialResults;
5991 PartialResults.
clear();
5992 NumPartsLeft = SplitSrcs.
size();
5996 MI.eraseFromParent();
6001 for (
unsigned Idx = 1; Idx < NumParts; ++Idx)
6002 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
6005 MI.eraseFromParent();
6009 for (
unsigned Part = 0; Part < NumParts; ++Part) {
6011 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
6019 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
6022 Register Acc = PartialReductions[0];
6023 for (
unsigned Part = 1; Part < NumParts; ++Part) {
6024 if (Part == NumParts - 1) {
6026 {Acc, PartialReductions[Part]});
6029 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
6033 MI.eraseFromParent();
6039 unsigned int TypeIdx,
6041 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
6042 MI.getFirst3RegLLTs();
6043 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
6047 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
6048 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
6049 "Unexpected vecreduce opcode");
6050 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
6051 ? TargetOpcode::G_FADD
6052 : TargetOpcode::G_FMUL;
6055 unsigned NumParts = SrcTy.getNumElements();
6058 for (
unsigned i = 0; i < NumParts; i++)
6059 Acc =
MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
6063 MI.eraseFromParent();
6070 unsigned ScalarOpc) {
6078 while (SplitSrcs.
size() > 1) {
6080 for (
unsigned Idx = 0; Idx < SplitSrcs.
size()-1; Idx += 2) {
6088 SplitSrcs = std::move(PartialRdxs);
6092 MI.getOperand(1).setReg(SplitSrcs[0]);
6099 const LLT HalfTy,
const LLT AmtTy) {
6101 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6102 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6106 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {InL, InH});
6107 MI.eraseFromParent();
6113 unsigned VTBits = 2 * NVTBits;
6116 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
6117 if (Amt.
ugt(VTBits)) {
6119 }
else if (Amt.
ugt(NVTBits)) {
6122 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6123 }
else if (Amt == NVTBits) {
6131 NVT, InL,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6134 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6135 if (Amt.
ugt(VTBits)) {
6137 }
else if (Amt.
ugt(NVTBits)) {
6139 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6141 }
else if (Amt == NVTBits) {
6145 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6147 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6149 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6155 if (Amt.
ugt(VTBits)) {
6157 NVT, InH,
MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6158 }
else if (Amt.
ugt(NVTBits)) {
6160 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6162 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6163 }
else if (Amt == NVTBits) {
6166 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6168 auto ShiftAmtConst =
MIRBuilder.buildConstant(AmtTy, Amt);
6170 auto OrLHS =
MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6172 NVT, InH,
MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6179 MIRBuilder.buildMergeLikeInstr(
MI.getOperand(0), {Lo, Hi});
6180 MI.eraseFromParent();
6196 LLT DstTy = MRI.getType(DstReg);
6201 LLT ShiftAmtTy = MRI.getType(Amt);
6203 if (DstEltSize % 2 != 0)
6219 const unsigned NumParts = DstEltSize / RequestedTy.
getSizeInBits();
6230 const unsigned NewBitSize = DstEltSize / 2;
6242 auto NewBits =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6244 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6245 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6248 auto AmtExcess =
MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6249 auto AmtLack =
MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6251 auto Zero =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6256 switch (
MI.getOpcode()) {
6257 case TargetOpcode::G_SHL: {
6259 auto LoS =
MIRBuilder.buildShl(HalfTy, InL, Amt);
6261 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6262 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, Amt);
6263 auto HiS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6266 auto LoL =
MIRBuilder.buildConstant(HalfTy, 0);
6267 auto HiL =
MIRBuilder.buildShl(HalfTy, InL, AmtExcess);
6269 auto Lo =
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6271 HalfTy, IsZero, InH,
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6273 ResultRegs[0] =
Lo.getReg(0);
6274 ResultRegs[1] =
Hi.getReg(0);
6277 case TargetOpcode::G_LSHR:
6278 case TargetOpcode::G_ASHR: {
6280 auto HiS =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy}, {InH, Amt});
6282 auto LoOr =
MIRBuilder.buildLShr(HalfTy, InL, Amt);
6283 auto HiOr =
MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6284 auto LoS =
MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6288 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
6291 auto ShiftAmt =
MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6292 HiL =
MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);
6294 auto LoL =
MIRBuilder.buildInstr(
MI.getOpcode(), {HalfTy},
6298 HalfTy, IsZero, InL,
MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6300 auto Hi =
MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6302 ResultRegs[0] =
Lo.getReg(0);
6303 ResultRegs[1] =
Hi.getReg(0);
6310 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6311 MI.eraseFromParent();
6320 LLT TargetTy,
LLT ShiftAmtTy) {
6323 assert(WordShiftConst && BitShiftConst &&
"Expected constants");
6325 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6326 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6327 const bool NeedsInterWordShift = ShiftBits != 0;
6330 case TargetOpcode::G_SHL: {
6333 if (PartIdx < ShiftWords)
6336 unsigned SrcIdx = PartIdx - ShiftWords;
6337 if (!NeedsInterWordShift)
6338 return SrcParts[SrcIdx];
6343 auto Lo =
MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6347 return Hi.getReg(0);
6350 case TargetOpcode::G_LSHR: {
6351 unsigned SrcIdx = PartIdx + ShiftWords;
6352 if (SrcIdx >= NumParts)
6354 if (!NeedsInterWordShift)
6355 return SrcParts[SrcIdx];
6359 if (SrcIdx + 1 < NumParts) {
6360 auto Hi =
MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6364 return Lo.getReg(0);
6367 case TargetOpcode::G_ASHR: {
6369 unsigned SrcIdx = PartIdx + ShiftWords;
6370 if (SrcIdx >= NumParts)
6372 if (!NeedsInterWordShift)
6373 return SrcParts[SrcIdx];
6378 (SrcIdx == NumParts - 1)
6382 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.
SignBit;
6404 unsigned MainOpcode = (Opcode == TargetOpcode::G_ASHR)
6405 ?
static_cast<unsigned>(TargetOpcode::G_LSHR)
6410 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6419 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6420 auto ZeroConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6422 auto IsZeroBitShift =
6430 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6431 : TargetOpcode::G_SHL;
6434 auto TargetBitsConst =
6436 auto InvShiftAmt =
MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6441 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6446 auto ZeroReg =
MIRBuilder.buildConstant(TargetTy, 0);
6448 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6452 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6465 LLT DstTy = MRI.getType(DstReg);
6469 const unsigned NumParts = DstBits / TargetBits;
6471 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6481 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6482 MI.eraseFromParent();
6487 const unsigned ShiftWords = Amt.
getZExtValue() / TargetBits;
6488 const unsigned ShiftBits = Amt.
getZExtValue() % TargetBits;
6494 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6498 if (
MI.getOpcode() == TargetOpcode::G_ASHR)
6501 .buildAShr(TargetTy, SrcParts[SrcParts.
size() - 1],
6502 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6506 for (
unsigned I = 0;
I < NumParts; ++
I)
6508 Params, TargetTy, ShiftAmtTy);
6510 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6511 MI.eraseFromParent();
6520 LLT DstTy = MRI.getType(DstReg);
6521 LLT ShiftAmtTy = MRI.getType(AmtReg);
6525 const unsigned NumParts = DstBits / TargetBits;
6527 assert(DstBits % TargetBits == 0 &&
"Target type must evenly divide source");
6544 auto ZeroAmtConst =
MIRBuilder.buildConstant(ShiftAmtTy, 0);
6556 unsigned TargetBitsLog2 =
Log2_32(TargetBits);
6557 auto TargetBitsLog2Const =
6558 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6559 auto TargetBitsMask =
MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6562 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6564 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6572 if (
MI.getOpcode() == TargetOpcode::G_ASHR) {
6573 auto TargetBitsMinusOneConst =
6574 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6576 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6577 TargetBitsMinusOneConst)
6580 FillValue = ZeroReg;
6588 for (
unsigned I = 0;
I < NumParts; ++
I) {
6590 Register InBoundsResult = FillValue;
6600 for (
unsigned K = 0; K < NumParts; ++K) {
6601 auto WordShiftKConst =
MIRBuilder.buildConstant(ShiftAmtTy, K);
6603 WordShift, WordShiftKConst);
6615 switch (
MI.getOpcode()) {
6616 case TargetOpcode::G_SHL:
6617 MainSrcIdx = (int)
I - (
int)K;
6618 CarrySrcIdx = MainSrcIdx - 1;
6620 case TargetOpcode::G_LSHR:
6621 case TargetOpcode::G_ASHR:
6622 MainSrcIdx = (int)
I + (
int)K;
6623 CarrySrcIdx = MainSrcIdx + 1;
6631 if (MainSrcIdx >= 0 && MainSrcIdx < (
int)NumParts) {
6632 Register MainOp = SrcParts[MainSrcIdx];
6636 if (CarrySrcIdx >= 0 && CarrySrcIdx < (
int)NumParts)
6637 CarryOp = SrcParts[CarrySrcIdx];
6638 else if (
MI.getOpcode() == TargetOpcode::G_ASHR &&
6639 CarrySrcIdx >= (
int)NumParts)
6640 CarryOp = FillValue;
6646 ResultForK = FillValue;
6652 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6659 .buildSelect(TargetTy, IsZeroShift, SrcParts[
I], InBoundsResult)
6663 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6664 MI.eraseFromParent();
6671 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
6674 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
6689 assert(Ty.isScalar() &&
"Expected scalar type to make neutral element for");
6694 "getNeutralElementForVecReduce called with invalid opcode!");
6695 case TargetOpcode::G_VECREDUCE_ADD:
6696 case TargetOpcode::G_VECREDUCE_OR:
6697 case TargetOpcode::G_VECREDUCE_XOR:
6698 case TargetOpcode::G_VECREDUCE_UMAX:
6700 case TargetOpcode::G_VECREDUCE_MUL:
6702 case TargetOpcode::G_VECREDUCE_AND:
6703 case TargetOpcode::G_VECREDUCE_UMIN:
6706 case TargetOpcode::G_VECREDUCE_SMAX:
6709 case TargetOpcode::G_VECREDUCE_SMIN:
6712 case TargetOpcode::G_VECREDUCE_FADD:
6714 case TargetOpcode::G_VECREDUCE_FMUL:
6716 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6717 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6718 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
6719 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6727 unsigned Opc =
MI.getOpcode();
6729 case TargetOpcode::G_IMPLICIT_DEF:
6730 case TargetOpcode::G_LOAD: {
6738 case TargetOpcode::G_STORE:
6745 case TargetOpcode::G_AND:
6746 case TargetOpcode::G_OR:
6747 case TargetOpcode::G_XOR:
6748 case TargetOpcode::G_ADD:
6749 case TargetOpcode::G_SUB:
6750 case TargetOpcode::G_MUL:
6751 case TargetOpcode::G_FADD:
6752 case TargetOpcode::G_FSUB:
6753 case TargetOpcode::G_FMUL:
6754 case TargetOpcode::G_FDIV:
6755 case TargetOpcode::G_FCOPYSIGN:
6756 case TargetOpcode::G_UADDSAT:
6757 case TargetOpcode::G_USUBSAT:
6758 case TargetOpcode::G_SADDSAT:
6759 case TargetOpcode::G_SSUBSAT:
6760 case TargetOpcode::G_SMIN:
6761 case TargetOpcode::G_SMAX:
6762 case TargetOpcode::G_UMIN:
6763 case TargetOpcode::G_UMAX:
6764 case TargetOpcode::G_FMINNUM:
6765 case TargetOpcode::G_FMAXNUM:
6766 case TargetOpcode::G_FMINNUM_IEEE:
6767 case TargetOpcode::G_FMAXNUM_IEEE:
6768 case TargetOpcode::G_FMINIMUM:
6769 case TargetOpcode::G_FMAXIMUM:
6770 case TargetOpcode::G_FMINIMUMNUM:
6771 case TargetOpcode::G_FMAXIMUMNUM:
6772 case TargetOpcode::G_STRICT_FADD:
6773 case TargetOpcode::G_STRICT_FSUB:
6774 case TargetOpcode::G_STRICT_FMUL: {
6782 case TargetOpcode::G_SHL:
6783 case TargetOpcode::G_ASHR:
6784 case TargetOpcode::G_LSHR: {
6790 MRI.getType(
MI.getOperand(2).getReg()).getElementType());
6796 case TargetOpcode::G_FMA:
6797 case TargetOpcode::G_STRICT_FMA:
6798 case TargetOpcode::G_FSHR:
6799 case TargetOpcode::G_FSHL: {
6808 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6809 case TargetOpcode::G_EXTRACT:
6816 case TargetOpcode::G_INSERT:
6817 case TargetOpcode::G_INSERT_VECTOR_ELT:
6818 case TargetOpcode::G_FREEZE:
6819 case TargetOpcode::G_FNEG:
6820 case TargetOpcode::G_FABS:
6821 case TargetOpcode::G_FSQRT:
6822 case TargetOpcode::G_FCEIL:
6823 case TargetOpcode::G_FFLOOR:
6824 case TargetOpcode::G_FNEARBYINT:
6825 case TargetOpcode::G_FRINT:
6826 case TargetOpcode::G_INTRINSIC_ROUND:
6827 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6828 case TargetOpcode::G_INTRINSIC_TRUNC:
6829 case TargetOpcode::G_BITREVERSE:
6830 case TargetOpcode::G_BSWAP:
6831 case TargetOpcode::G_FCANONICALIZE:
6832 case TargetOpcode::G_SEXT_INREG:
6833 case TargetOpcode::G_ABS:
6834 case TargetOpcode::G_CTLZ:
6835 case TargetOpcode::G_CTPOP:
6843 case TargetOpcode::G_SELECT: {
6844 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6846 if (!CondTy.isScalar() ||
6852 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6854 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6859 if (CondTy.isVector())
6869 case TargetOpcode::G_UNMERGE_VALUES:
6871 case TargetOpcode::G_PHI:
6873 case TargetOpcode::G_SHUFFLE_VECTOR:
6875 case TargetOpcode::G_BUILD_VECTOR: {
6877 for (
auto Op :
MI.uses()) {
6885 MIRBuilder.buildDeleteTrailingVectorElements(
6886 MI.getOperand(0).getReg(),
MIRBuilder.buildInstr(
Opc, {MoreTy}, Elts));
6887 MI.eraseFromParent();
6890 case TargetOpcode::G_SEXT:
6891 case TargetOpcode::G_ZEXT:
6892 case TargetOpcode::G_ANYEXT:
6893 case TargetOpcode::G_TRUNC:
6894 case TargetOpcode::G_FPTRUNC:
6895 case TargetOpcode::G_FPEXT:
6896 case TargetOpcode::G_FPTOSI:
6897 case TargetOpcode::G_FPTOUI:
6898 case TargetOpcode::G_FPTOSI_SAT:
6899 case TargetOpcode::G_FPTOUI_SAT:
6900 case TargetOpcode::G_SITOFP:
6901 case TargetOpcode::G_UITOFP: {
6908 MRI.getType(
MI.getOperand(1).getReg()).getElementType());
6911 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6919 case TargetOpcode::G_ICMP:
6920 case TargetOpcode::G_FCMP: {
6928 MRI.getType(
MI.getOperand(0).getReg()).getElementType());
6933 case TargetOpcode::G_BITCAST: {
6937 LLT SrcTy = MRI.getType(
MI.getOperand(1).getReg());
6938 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
6954 case TargetOpcode::G_VECREDUCE_FADD:
6955 case TargetOpcode::G_VECREDUCE_FMUL:
6956 case TargetOpcode::G_VECREDUCE_ADD:
6957 case TargetOpcode::G_VECREDUCE_MUL:
6958 case TargetOpcode::G_VECREDUCE_AND:
6959 case TargetOpcode::G_VECREDUCE_OR:
6960 case TargetOpcode::G_VECREDUCE_XOR:
6961 case TargetOpcode::G_VECREDUCE_SMAX:
6962 case TargetOpcode::G_VECREDUCE_SMIN:
6963 case TargetOpcode::G_VECREDUCE_UMAX:
6964 case TargetOpcode::G_VECREDUCE_UMIN: {
6965 LLT OrigTy = MRI.getType(
MI.getOperand(1).getReg());
6967 auto NewVec =
MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6968 auto NeutralElement = getNeutralElementForVecReduce(
6974 auto Idx =
MIRBuilder.buildConstant(IdxTy, i);
6975 NewVec =
MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6976 NeutralElement, Idx);
6980 MO.
setReg(NewVec.getReg(0));
6992 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6994 unsigned MaskNumElts = Mask.size();
6995 unsigned SrcNumElts = SrcTy.getNumElements();
6998 if (MaskNumElts == SrcNumElts)
7001 if (MaskNumElts < SrcNumElts) {
7009 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7010 MI.getOperand(1).getReg(),
7011 MI.getOperand(2).getReg(), NewMask);
7012 MI.eraseFromParent();
7017 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
7018 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
7027 MOps1[0] =
MI.getOperand(1).getReg();
7028 MOps2[0] =
MI.getOperand(2).getReg();
7030 auto Src1 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
7031 auto Src2 =
MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
7035 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
7037 if (Idx >=
static_cast<int>(SrcNumElts))
7038 Idx += PaddedMaskNumElts - SrcNumElts;
7043 if (MaskNumElts != PaddedMaskNumElts) {
7045 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
7048 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
7050 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle,
I)
7055 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
7058 MI.eraseFromParent();
7064 unsigned int TypeIdx,
LLT MoreTy) {
7065 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
7067 unsigned NumElts = DstTy.getNumElements();
7070 if (DstTy.isVector() && Src1Ty.isVector() &&
7071 DstTy.getNumElements() != Src1Ty.getNumElements()) {
7079 if (DstTy != Src1Ty || DstTy != Src2Ty)
7087 for (
unsigned I = 0;
I != NumElts; ++
I) {
7089 if (Idx <
static_cast<int>(NumElts))
7092 NewMask[
I] = Idx - NumElts + WidenNumElts;
7096 MIRBuilder.buildShuffleVector(
MI.getOperand(0).getReg(),
7097 MI.getOperand(1).getReg(),
7098 MI.getOperand(2).getReg(), NewMask);
7099 MI.eraseFromParent();
7108 unsigned SrcParts = Src1Regs.
size();
7109 unsigned DstParts = DstRegs.
size();
7111 unsigned DstIdx = 0;
7113 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7114 DstRegs[DstIdx] = FactorSum;
7119 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7121 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7122 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7124 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7130 unsigned LowStart = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7131 unsigned LowEnd = std::min(DstIdx, SrcParts - 1);
7132 for (
unsigned RevI = LowEnd + 1; RevI != LowStart; --RevI) {
7133 unsigned i = RevI - 1;
7135 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7145 if (DstIdx != DstParts - 1) {
7146 MachineInstrBuilder Uaddo =
7147 B.buildUAddo(NarrowTy,
LLT::integer(1), Factors[0], Factors[1]);
7148 FactorSum = Uaddo.
getReg(0);
7149 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).getReg(0);
7150 for (
unsigned i = 2; i < Factors.
size(); ++i) {
7151 MachineInstrBuilder Uaddo =
7152 B.buildUAddo(NarrowTy,
LLT::integer(1), FactorSum, Factors[i]);
7153 FactorSum = Uaddo.
getReg(0);
7154 MachineInstrBuilder Carry =
B.buildZExt(NarrowTy, Uaddo.
getReg(1));
7155 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7159 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7160 for (
unsigned i = 2; i < Factors.
size(); ++i)
7161 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7164 CarrySumPrevDstIdx = CarrySum;
7165 DstRegs[DstIdx] = FactorSum;
7177 LLT DstType = MRI.getType(DstReg);
7179 if (DstType.isVector())
7182 unsigned Opcode =
MI.getOpcode();
7183 unsigned OpO, OpE, OpF;
7185 case TargetOpcode::G_SADDO:
7186 case TargetOpcode::G_SADDE:
7187 case TargetOpcode::G_UADDO:
7188 case TargetOpcode::G_UADDE:
7189 case TargetOpcode::G_ADD:
7190 OpO = TargetOpcode::G_UADDO;
7191 OpE = TargetOpcode::G_UADDE;
7192 OpF = TargetOpcode::G_UADDE;
7193 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7194 OpF = TargetOpcode::G_SADDE;
7196 case TargetOpcode::G_SSUBO:
7197 case TargetOpcode::G_SSUBE:
7198 case TargetOpcode::G_USUBO:
7199 case TargetOpcode::G_USUBE:
7200 case TargetOpcode::G_SUB:
7201 OpO = TargetOpcode::G_USUBO;
7202 OpE = TargetOpcode::G_USUBE;
7203 OpF = TargetOpcode::G_USUBE;
7204 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7205 OpF = TargetOpcode::G_SSUBE;
7212 unsigned NumDefs =
MI.getNumExplicitDefs();
7213 Register Src1 =
MI.getOperand(NumDefs).getReg();
7214 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
7217 CarryDst =
MI.getOperand(1).getReg();
7218 if (
MI.getNumOperands() == NumDefs + 3)
7219 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
7221 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7222 LLT LeftoverTy, DummyTy;
7224 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7229 int NarrowParts = Src1Regs.
size();
7230 Src1Regs.
append(Src1Left);
7231 Src2Regs.
append(Src2Left);
7234 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
7236 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7239 if (i == e - 1 && CarryDst)
7240 CarryOut = CarryDst;
7242 CarryOut = MRI.createGenericVirtualRegister(
LLT::integer(1));
7245 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7246 {Src1Regs[i], Src2Regs[i]});
7247 }
else if (i == e - 1) {
7248 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7249 {Src1Regs[i], Src2Regs[i], CarryIn});
7251 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7252 {Src1Regs[i], Src2Regs[i], CarryIn});
7258 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
7259 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7260 ArrayRef(DstRegs).drop_front(NarrowParts));
7262 MI.eraseFromParent();
7268 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
7270 LLT Ty = MRI.getType(DstReg);
7274 unsigned Size = Ty.getSizeInBits();
7276 if (
Size % NarrowSize != 0)
7279 unsigned NumParts =
Size / NarrowSize;
7280 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
7281 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7287 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7291 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7292 MI.eraseFromParent();
7302 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
7305 LLT SrcTy = MRI.getType(Src);
7316 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7329 int64_t SizeOp1 = MRI.getType(
MI.getOperand(1).getReg()).getSizeInBits();
7332 if (SizeOp1 % NarrowSize != 0)
7334 int NumParts = SizeOp1 / NarrowSize;
7337 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7341 uint64_t OpStart =
MI.getOperand(2).getImm();
7342 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7343 for (
int i = 0; i < NumParts; ++i) {
7344 unsigned SrcStart = i * NarrowSize;
7346 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7349 }
else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7357 int64_t ExtractOffset;
7359 if (OpStart < SrcStart) {
7361 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7363 ExtractOffset = OpStart - SrcStart;
7364 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7368 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7370 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7371 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7378 if (MRI.getType(DstReg).isVector())
7379 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7380 else if (DstRegs.
size() > 1)
7381 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7384 MI.eraseFromParent();
7396 LLT RegTy = MRI.getType(
MI.getOperand(0).getReg());
7398 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7401 SrcRegs.
append(LeftoverRegs);
7405 uint64_t OpStart =
MI.getOperand(3).getImm();
7406 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7407 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
7408 unsigned DstStart =
I * NarrowSize;
7410 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7418 if (MRI.getType(SrcRegs[
I]) == LeftoverTy) {
7420 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7424 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7432 int64_t ExtractOffset, InsertOffset;
7434 if (OpStart < DstStart) {
7436 ExtractOffset = DstStart - OpStart;
7437 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7439 InsertOffset = OpStart - DstStart;
7442 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7446 if (ExtractOffset != 0 || SegSize != OpSize) {
7448 SegReg = MRI.createGenericVirtualRegister(
LLT::scalar(SegSize));
7449 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7452 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7453 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7461 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7464 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7466 MI.eraseFromParent();
7474 LLT DstTy = MRI.getType(DstReg);
7476 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
7482 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7483 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
7487 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7488 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7491 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7492 auto Inst =
MIRBuilder.buildInstr(
MI.getOpcode(), {NarrowTy},
7493 {Src0Regs[I], Src1Regs[I]});
7497 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7500 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7501 DstLeftoverRegs.
push_back(Inst.getReg(0));
7504 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7505 LeftoverTy, DstLeftoverRegs);
7507 MI.eraseFromParent();
7517 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
7519 LLT DstTy = MRI.getType(DstReg);
7524 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7525 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
7526 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7528 MI.eraseFromParent();
7538 Register CondReg =
MI.getOperand(1).getReg();
7539 LLT CondTy = MRI.getType(CondReg);
7540 if (CondTy.isVector())
7544 LLT DstTy = MRI.getType(DstReg);
7550 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7551 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
7555 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7556 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
7559 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
7561 CondReg, Src1Regs[
I], Src2Regs[
I]);
7565 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
7567 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
7571 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7572 LeftoverTy, DstLeftoverRegs);
7574 MI.eraseFromParent();
7584 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7587 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7588 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_POISON;
7591 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7593 auto C_0 =
B.buildConstant(NarrowTy, 0);
7595 UnmergeSrc.getReg(1), C_0);
7596 auto LoCTLZ = IsUndef ?
B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0))
7597 :
B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7598 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7599 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7600 auto HiCTLZ =
B.buildCTLZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1));
7601 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7603 MI.eraseFromParent();
7616 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7619 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7620 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_POISON;
7623 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7625 auto C_0 =
B.buildConstant(NarrowTy, 0);
7627 UnmergeSrc.getReg(0), C_0);
7628 auto HiCTTZ = IsUndef ?
B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(1))
7629 :
B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7630 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
7631 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7632 auto LoCTTZ =
B.buildCTTZ_ZERO_POISON(DstTy, UnmergeSrc.getReg(0));
7633 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7635 MI.eraseFromParent();
7648 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7651 if (!SrcTy.isScalar() || SrcTy.getSizeInBits() != 2 * NarrowSize)
7656 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
7660 auto ShAmt =
B.buildConstant(NarrowTy, NarrowSize - 1);
7661 auto Sign =
B.buildAShr(NarrowTy,
Hi, ShAmt);
7669 auto LoInv =
B.buildXor(DstTy,
Lo, Sign);
7670 auto LoCTLZ =
B.buildCTLZ(DstTy, LoInv);
7673 auto C_NarrowSizeM1 =
B.buildConstant(DstTy, NarrowSize - 1);
7674 auto HiIsSignCTLS =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSizeM1);
7676 auto HiCTLS =
B.buildCTLS(DstTy,
Hi);
7678 B.buildSelect(DstReg, HiIsSign, HiIsSignCTLS, HiCTLS);
7680 MI.eraseFromParent();
7690 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7693 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7694 auto UnmergeSrc =
MIRBuilder.buildUnmerge(NarrowTy,
MI.getOperand(1));
7696 auto LoCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7697 auto HiCTPOP =
MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7698 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7700 MI.eraseFromParent();
7715 LLT ExpTy = MRI.getType(ExpReg);
7720 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
7721 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
7722 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
7723 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
7725 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
7727 MI.getOperand(2).setReg(Trunc.getReg(0));
7734 unsigned Opc =
MI.getOpcode();
7737 auto QAction = LI.getAction(Q).Action;
7743 case TargetOpcode::G_CTLZ_ZERO_POISON: {
7746 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
7750 case TargetOpcode::G_CTLZ: {
7751 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7752 unsigned Len = SrcTy.getScalarSizeInBits();
7754 if (isSupported({TargetOpcode::G_CTLZ_ZERO_POISON, {DstTy, SrcTy}})) {
7756 auto CtlzZU =
MIRBuilder.buildCTLZ_ZERO_POISON(DstTy, SrcReg);
7757 auto ZeroSrc =
MIRBuilder.buildConstant(SrcTy, 0);
7760 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7761 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7762 MI.eraseFromParent();
7778 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7779 auto MIBShiftAmt =
MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7782 Op = MIBOp.getReg(0);
7787 MI.eraseFromParent();
7790 case TargetOpcode::G_CTTZ_ZERO_POISON: {
7793 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
7797 case TargetOpcode::G_CTTZ: {
7798 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7800 unsigned Len = SrcTy.getScalarSizeInBits();
7801 if (isSupported({TargetOpcode::G_CTTZ_ZERO_POISON, {DstTy, SrcTy}})) {
7804 auto CttzZU =
MIRBuilder.buildCTTZ_ZERO_POISON(DstTy, SrcReg);
7805 auto Zero =
MIRBuilder.buildConstant(SrcTy, 0);
7808 auto LenConst =
MIRBuilder.buildConstant(DstTy, Len);
7809 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7810 MI.eraseFromParent();
7817 auto MIBCstNeg1 =
MIRBuilder.buildConstant(SrcTy, -1);
7818 auto MIBNot =
MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7820 SrcTy, MIBNot,
MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7821 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7822 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7823 auto MIBCstLen =
MIRBuilder.buildConstant(SrcTy, Len);
7826 MI.eraseFromParent();
7830 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7831 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7835 case TargetOpcode::G_CTPOP: {
7837 LLT Ty = MRI.getType(SrcReg);
7838 unsigned Size = Ty.getScalarSizeInBits();
7850 auto C_1 =
B.buildConstant(Ty, 1);
7851 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7853 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7854 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7855 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7859 auto C_2 =
B.buildConstant(Ty, 2);
7860 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7862 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7863 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7864 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7865 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7872 auto C_4 =
B.buildConstant(Ty, 4);
7873 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7874 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7876 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7877 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7879 assert(
Size <= 128 &&
"Scalar size is too large for CTPOP lower algorithm");
7882 if (
Size == 16 && !Ty.isVector()) {
7884 auto C_8 =
B.buildConstant(Ty, 8);
7885 auto HighSum =
B.buildLShr(Ty, B8Count, C_8);
7886 auto Res =
B.buildAdd(Ty, B8Count, HighSum);
7887 B.buildAnd(
MI.getOperand(0).getReg(), Res,
B.buildConstant(Ty, 0xFF));
7888 MI.eraseFromParent();
7897 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7899 auto IsMulSupported = [
this](
const LLT Ty) {
7900 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7903 if (IsMulSupported(Ty)) {
7904 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7905 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7907 auto ResTmp = B8Count;
7908 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7909 auto ShiftC =
B.buildConstant(Ty, Shift);
7910 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7911 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7913 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7915 MI.eraseFromParent();
7918 case TargetOpcode::G_CTLS: {
7919 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
7923 MIRBuilder.buildConstant(SrcTy, SrcTy.getScalarSizeInBits() - 1);
7924 auto OneC =
MIRBuilder.buildConstant(DstTy, 1);
7926 auto Shr =
MIRBuilder.buildAShr(SrcTy, SrcReg, SignIdxC);
7932 MI.eraseFromParent();
7953 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7954 LLT Ty = MRI.getType(Dst);
7955 LLT ShTy = MRI.getType(Z);
7962 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7963 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7968 auto Zero =
MIRBuilder.buildConstant(ShTy, 0);
7969 Z =
MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7973 auto One =
MIRBuilder.buildConstant(ShTy, 1);
7986 MI.eraseFromParent();
7992 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7993 LLT Ty = MRI.getType(Dst);
7994 LLT ShTy = MRI.getType(Z);
7997 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8007 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8008 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8009 InvShAmt =
MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
8010 ShX =
MIRBuilder.buildShl(Ty,
X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
8011 ShY =
MIRBuilder.buildLShr(Ty,
Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
8015 auto Mask =
MIRBuilder.buildConstant(ShTy, BW - 1);
8018 ShAmt =
MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
8021 InvShAmt =
MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
8023 auto BitWidthC =
MIRBuilder.buildConstant(ShTy, BW);
8024 ShAmt =
MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
8025 InvShAmt =
MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
8028 auto One =
MIRBuilder.buildConstant(ShTy, 1);
8030 ShX =
MIRBuilder.buildShl(Ty,
X, ShAmt).getReg(0);
8032 ShY =
MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
8035 ShX =
MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
8036 ShY =
MIRBuilder.buildLShr(Ty,
Y, ShAmt).getReg(0);
8041 MI.eraseFromParent();
8052 LLT Ty = MRI.getType(Dst);
8053 LLT ShTy = MRI.getType(
MI.getOperand(3).getReg());
8055 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
8056 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
8059 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action ==
Lower)
8060 return lowerFunnelShiftAsShifts(
MI);
8064 if (Result == UnableToLegalize)
8065 return lowerFunnelShiftAsShifts(
MI);
8070 auto [Dst, Src] =
MI.getFirst2Regs();
8071 LLT DstTy = MRI.getType(Dst);
8072 LLT SrcTy = MRI.getType(Src);
8076 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
8084 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
8088 auto NewExt =
MIRBuilder.buildInstr(
MI.getOpcode(), {MidTy}, {Src});
8092 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, NewExt);
8097 auto ZExtRes1 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8098 {UnmergeSrc.getReg(0)});
8099 auto ZExtRes2 =
MIRBuilder.buildInstr(
MI.getOpcode(), {ZExtResTy},
8100 {UnmergeSrc.getReg(1)});
8103 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
8105 MI.eraseFromParent();
8122 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
8126 LLT DstTy = MRI.getType(DstReg);
8127 LLT SrcTy = MRI.getType(SrcReg);
8135 SrcTy.getElementCount().divideCoefficientBy(2));
8148 Src =
MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
8160 MI.eraseFromParent();
8169 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8170 auto Zero =
MIRBuilder.buildConstant(AmtTy, 0);
8171 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8172 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8173 auto Neg =
MIRBuilder.buildSub(AmtTy, Zero, Amt);
8174 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8175 MI.eraseFromParent();
8180 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
8182 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8183 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
8188 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8189 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8191 return lowerRotateWithReverseRotate(
MI);
8194 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8195 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8196 bool IsFShLegal =
false;
8197 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8198 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8202 MI.eraseFromParent();
8207 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8210 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8215 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8216 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8217 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
8223 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
8224 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8226 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8232 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
8233 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
8235 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8237 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8242 MI.eraseFromParent();
8250 auto [Dst, Src] =
MI.getFirst2Regs();
8255 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8283 auto Mask1 =
MIRBuilder.buildConstant(
S64, 0xffffffffffULL);
8296 auto Select0 =
MIRBuilder.buildSelect(
S32, TCmp, VTrunc1, Zero32);
8300 MI.eraseFromParent();
8308 auto [Dst, Src] =
MI.getFirst2Regs();
8313 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S32);
8326 auto RoundedHalved =
MIRBuilder.buildOr(
S64, Halved, LowerBit);
8328 auto LargeResult =
MIRBuilder.buildFAdd(
S32, HalvedFP, HalvedFP);
8333 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8335 MI.eraseFromParent();
8343 auto [Dst, Src] =
MI.getFirst2Regs();
8347 assert(MRI.getType(Src) ==
S64 && MRI.getType(Dst) ==
S64);
8358 auto TwoP52 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4330000000000000));
8359 auto TwoP84 =
MIRBuilder.buildConstant(
S64, UINT64_C(0x4530000000000000));
8361 auto TwoP52P84FP =
MIRBuilder.buildFConstant(
S64, TwoP52P84);
8368 auto HighBitsFP =
MIRBuilder.buildOr(
S64, TwoP84, HighBits);
8369 auto Scratch =
MIRBuilder.buildFSub(
S64, HighBitsFP, TwoP52P84FP);
8370 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8372 MI.eraseFromParent();
8383 SrcTy.changeElementType(
LLT::floatIEEE(SrcTy.getScalarSizeInBits()));
8384 auto M1 =
MI.getOpcode() == TargetOpcode::G_UITOFP
8390 MI.eraseFromParent();
8395 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8398 auto True =
MIRBuilder.buildFConstant(DstTy, 1.0);
8399 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8400 MIRBuilder.buildSelect(Dst, Src, True, False);
8401 MI.eraseFromParent();
8405 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8425 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8432 auto True =
MIRBuilder.buildFConstant(DstTy, -1.0);
8433 auto False =
MIRBuilder.buildFConstant(DstTy, 0.0);
8434 MIRBuilder.buildSelect(Dst, Src, True, False);
8435 MI.eraseFromParent();
8439 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8445 if (DstTy.getScalarSizeInBits() == 32) {
8452 auto SignBit =
MIRBuilder.buildConstant(I64, 63);
8453 auto S =
MIRBuilder.buildAShr(I64, L, SignBit);
8455 auto LPlusS =
MIRBuilder.buildAdd(I64, L, S);
8462 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8463 MI.eraseFromParent();
8471 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8475 if (SrcTy !=
S64 && SrcTy !=
S32)
8477 if (DstTy !=
S32 && DstTy !=
S64)
8504 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8506 MI.eraseFromParent();
8511 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8516 if (SrcTy.getScalarType() !=
S32 || DstTy.getScalarType() !=
S64)
8523 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8525 auto ExponentMask =
MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8526 auto ExponentLoBit =
MIRBuilder.buildConstant(SrcTy, 23);
8528 auto AndExpMask =
MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8529 auto ExponentBits =
MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8531 auto SignMask =
MIRBuilder.buildConstant(SrcTy,
8533 auto AndSignMask =
MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8534 auto SignLowBit =
MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8535 auto Sign =
MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8538 auto MantissaMask =
MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8539 auto AndMantissaMask =
MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8540 auto K =
MIRBuilder.buildConstant(SrcTy, 0x00800000);
8542 auto R =
MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8545 auto Bias =
MIRBuilder.buildConstant(SrcTy, 127);
8550 auto Shl =
MIRBuilder.buildShl(DstTy, R, SubExponent);
8551 auto Srl =
MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8557 R =
MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8559 auto XorSign =
MIRBuilder.buildXor(DstTy, R, Sign);
8560 auto Ret =
MIRBuilder.buildSub(DstTy, XorSign, Sign);
8562 auto ZeroSrcTy =
MIRBuilder.buildConstant(SrcTy, 0);
8567 auto ZeroDstTy =
MIRBuilder.buildConstant(DstTy, 0);
8568 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8570 MI.eraseFromParent();
8576 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
8578 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8579 unsigned SatWidth = DstTy.getScalarSizeInBits();
8583 APInt MinInt, MaxInt;
8606 if (AreExactFloatBounds) {
8608 auto MaxC =
MIRBuilder.buildFConstant(SrcTy, MinFloat);
8611 auto Max =
MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8613 auto MinC =
MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8622 MI.eraseFromParent();
8627 auto FpToInt =
MIRBuilder.buildFPTOSI(DstTy, Min);
8632 MI.eraseFromParent();
8639 auto FpToInt = IsSigned ?
MIRBuilder.buildFPTOSI(DstTy, Src)
8647 DstTy, ULT,
MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8657 MI.eraseFromParent();
8663 DstTy, OGT,
MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8667 MI.eraseFromParent();
8674 assert((
MI.getOpcode() == TargetOpcode::G_FPEXT ||
8675 MI.getOpcode() == TargetOpcode::G_FPTRUNC) &&
8676 "Only G_FPEXT and G_FPTRUNC are expected");
8678 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8683 if (
MI.getOpcode() == TargetOpcode::G_FPEXT) {
8685 StoreOpc = TargetOpcode::G_STORE;
8686 LoadOpc = TargetOpcode::G_FPEXTLOAD;
8689 StoreOpc = TargetOpcode::G_FPTRUNCSTORE;
8690 LoadOpc = TargetOpcode::G_LOAD;
8699 StackTy, StackTyAlign);
8700 MIRBuilder.buildStoreInstr(StoreOpc, SrcReg, StackTemp, *StoreMMO);
8703 StackTy, StackTyAlign);
8704 MIRBuilder.buildLoadInstr(LoadOpc, DstReg, StackTemp, *LoadMMO);
8706 MI.eraseFromParent();
8716 auto [Dst, Src] =
MI.getFirst2Regs();
8720 if (MRI.getType(Src).isVector())
8724 unsigned Flags =
MI.getFlags();
8727 MI.eraseFromParent();
8731 const unsigned ExpMask = 0x7ff;
8732 const unsigned ExpBiasf64 = 1023;
8733 const unsigned ExpBiasf16 = 15;
8762 auto SelectCC =
MIRBuilder.buildSelect(
S32, CmpM_NE0, Bits0x200, Zero);
8822 MI.eraseFromParent();
8829 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8839 auto SrcI =
MIRBuilder.buildBitcast(I32Ty, SrcReg);
8861 auto Trunc =
MIRBuilder.buildTrunc(I16Ty, Srl);
8863 MI.eraseFromParent();
8869 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
8870 if (DstTy.getScalarType().isFloat16() && SrcTy.getScalarType().isFloat64())
8873 if (DstTy.getScalarType().isBFloat16() && SrcTy.getScalarType().isFloat32())
8880 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8881 LLT Ty = MRI.getType(Dst);
8883 auto CvtSrc1 =
MIRBuilder.buildSITOFP(Ty, Src1);
8884 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1,
MI.getFlags());
8885 MI.eraseFromParent();
8890 auto [DstFrac, DstInt, Src] =
MI.getFirst3Regs();
8891 LLT Ty = MRI.getType(Src);
8892 auto Flags =
MI.getFlags();
8900 FracToUse = FracPart.getReg(0);
8902 auto Abs =
MIRBuilder.buildFAbs(Ty, Src, Flags);
8906 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
8908 FracToUse =
Select.getReg(0);
8911 MIRBuilder.buildFCopysign(DstFrac, FracToUse, Src, Flags);
8914 MI.eraseFromParent();
8920 case TargetOpcode::G_SMIN:
8922 case TargetOpcode::G_SMAX:
8924 case TargetOpcode::G_UMIN:
8926 case TargetOpcode::G_UMAX:
8934 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8939 auto Cmp =
MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8940 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8942 MI.eraseFromParent();
8951 LLT DstTy = MRI.getType(Dst);
8952 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8962 auto Zero =
MIRBuilder.buildConstant(DstTy, 0);
8963 auto IsGT =
MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8965 auto IsLT =
MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8968 auto &Ctx =
MIRBuilder.getMF().getFunction().getContext();
8969 auto BC = TLI.getBooleanContents(DstTy.
isVector(),
false);
8970 if (TLI.preferSelectsOverBooleanArithmetic(
8973 auto One =
MIRBuilder.buildConstant(DstTy, 1);
8974 auto SelectZeroOrOne =
MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8976 auto MinusOne =
MIRBuilder.buildConstant(DstTy, -1);
8977 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8983 unsigned BoolExtOp =
8985 IsGT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8986 IsLT =
MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8990 MI.eraseFromParent();
8996 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8997 const int Src0Size = Src0Ty.getScalarSizeInBits();
8998 const int Src1Size = Src1Ty.getScalarSizeInBits();
9008 if (!(Src0Ty.getScalarType().isAnyScalar() ||
9009 Src0Ty.getScalarType().isInteger()))
9010 Src0Int =
MIRBuilder.buildBitcast(Src0IntTy, Src0).getReg(0);
9012 if (!(Src1Ty.getScalarType().isAnyScalar() ||
9013 Src1Ty.getScalarType().isInteger()))
9014 Src1Int =
MIRBuilder.buildBitcast(Src1IntTy, Src1).getReg(0);
9019 auto NotSignBitMask =
MIRBuilder.buildConstant(
9023 MIRBuilder.buildAnd(Src0IntTy, Src0Int, NotSignBitMask).getReg(0);
9025 if (Src0Ty == Src1Ty) {
9026 And1 =
MIRBuilder.buildAnd(Src1IntTy, Src1Int, SignBitMask).getReg(0);
9027 }
else if (Src0Size > Src1Size) {
9028 auto ShiftAmt =
MIRBuilder.buildConstant(Src0IntTy, Src0Size - Src1Size);
9029 auto Zext =
MIRBuilder.buildZExt(Src0IntTy, Src1Int);
9030 auto Shift =
MIRBuilder.buildShl(Src0IntTy, Zext, ShiftAmt);
9031 And1 =
MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
9033 auto ShiftAmt =
MIRBuilder.buildConstant(Src1IntTy, Src1Size - Src0Size);
9034 auto Shift =
MIRBuilder.buildLShr(Src1IntTy, Src1Int, ShiftAmt);
9035 auto Trunc =
MIRBuilder.buildTrunc(Src0IntTy, Shift);
9036 And1 =
MIRBuilder.buildAnd(Src0IntTy, Trunc, SignBitMask).getReg(0);
9042 unsigned Flags =
MI.getFlags();
9047 if (DstTy == DstIntTy)
9048 MIRBuilder.buildOr(Dst, And0, And1, Flags).getReg(0);
9054 MI.eraseFromParent();
9065 switch (
MI.getOpcode()) {
9066 case TargetOpcode::G_FMINNUM:
9067 NewOp = TargetOpcode::G_FMINNUM_IEEE;
9069 case TargetOpcode::G_FMINIMUMNUM:
9070 NewOp = TargetOpcode::G_FMINNUM;
9072 case TargetOpcode::G_FMAXNUM:
9073 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
9075 case TargetOpcode::G_FMAXIMUMNUM:
9076 NewOp = TargetOpcode::G_FMAXNUM;
9082 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9083 LLT Ty = MRI.getType(Dst);
9092 if (!VT->isKnownNeverSNaN(Src0))
9093 Src0 =
MIRBuilder.buildFCanonicalize(Ty, Src0,
MI.getFlags()).getReg(0);
9095 if (!VT->isKnownNeverSNaN(Src1))
9096 Src1 =
MIRBuilder.buildFCanonicalize(Ty, Src1,
MI.getFlags()).getReg(0);
9101 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1},
MI.getFlags());
9102 MI.eraseFromParent();
9108 unsigned Opc =
MI.getOpcode();
9109 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
9110 LLT Ty = MRI.getType(Dst);
9113 bool IsMax = (
Opc == TargetOpcode::G_FMAXIMUM);
9115 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
9116 unsigned OpcNonIeee =
9117 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
9118 bool MinMaxMustRespectOrderedZero =
false;
9122 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
9124 MinMaxMustRespectOrderedZero =
true;
9125 }
else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
9130 Res =
MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
9135 (!VT->isKnownNeverNaN(Src0) || !VT->isKnownNeverNaN(Src1))) {
9138 LLT ElementTy = Ty.
isScalar() ? Ty : Ty.getElementType();
9142 NaN =
MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
9144 Res =
MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
9154 const unsigned Flags =
MI.getFlags();
9160 auto LHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
9162 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
9164 auto RHSTestZero =
MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
9166 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
9168 Res =
MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
9173 MI.eraseFromParent();
9180 LLT Ty = MRI.getType(DstReg);
9181 unsigned Flags =
MI.getFlags();
9186 MI.eraseFromParent();
9192 auto [DstReg,
X] =
MI.getFirst2Regs();
9193 const unsigned Flags =
MI.getFlags();
9194 const LLT Ty = MRI.getType(DstReg);
9206 auto AbsDiff =
MIRBuilder.buildFAbs(Ty, Diff, Flags);
9208 auto Half =
MIRBuilder.buildFConstant(Ty, 0.5);
9213 auto One =
MIRBuilder.buildFConstant(Ty, 1.0);
9214 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9215 auto BoolFP =
MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
9216 auto SignedOffset =
MIRBuilder.buildFCopysign(Ty, BoolFP,
X);
9218 MIRBuilder.buildFAdd(DstReg,
T, SignedOffset, Flags);
9220 MI.eraseFromParent();
9225 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
9226 unsigned Flags =
MI.getFlags();
9227 LLT Ty = MRI.getType(DstReg);
9234 auto Trunc =
MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
9235 auto Zero =
MIRBuilder.buildFConstant(Ty, 0.0);
9238 SrcReg, Zero, Flags);
9240 SrcReg, Trunc, Flags);
9244 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
9245 MI.eraseFromParent();
9251 const unsigned NumOps =
MI.getNumOperands();
9252 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
9253 unsigned PartSize = Src0Ty.getSizeInBits();
9258 for (
unsigned I = 2;
I !=
NumOps; ++
I) {
9259 const unsigned Offset = (
I - 1) * PartSize;
9262 auto ZextInput =
MIRBuilder.buildZExt(WideTy, SrcReg);
9265 MRI.createGenericVirtualRegister(WideTy);
9268 auto Shl =
MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
9269 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
9270 ResultReg = NextResult;
9273 if (DstTy.isPointer()) {
9274 if (
MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
9275 DstTy.getAddressSpace())) {
9283 MI.eraseFromParent();
9289 const unsigned NumDst =
MI.getNumOperands() - 1;
9290 Register SrcReg =
MI.getOperand(NumDst).getReg();
9291 Register Dst0Reg =
MI.getOperand(0).getReg();
9292 LLT DstTy = MRI.getType(Dst0Reg);
9301 LLT IntTy = MRI.getType(SrcReg);
9306 unsigned Offset = DstSize;
9307 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
9309 auto Shift =
MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9313 MI.eraseFromParent();
9332 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9333 InsertVal =
MI.getOperand(2).getReg();
9335 Register Idx =
MI.getOperand(
MI.getNumOperands() - 1).getReg();
9337 LLT VecTy = MRI.getType(SrcVec);
9347 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
9348 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9350 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9353 MI.eraseFromParent();
9358 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
9369 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9376 int64_t
Offset = IdxVal * EltBytes;
9387 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9390 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9392 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9395 MI.eraseFromParent();
9401 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9402 MI.getFirst3RegLLTs();
9412 for (
int Idx : Mask) {
9414 if (!
Undef.isValid())
9420 assert(!Src0Ty.isScalar() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9422 int NumElts = Src0Ty.getNumElements();
9423 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9424 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9425 auto [It, Inserted] = CachedExtract.
try_emplace(Idx);
9427 auto IdxK =
MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9429 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9434 assert(DstTy.isVector() &&
"Unexpected scalar G_SHUFFLE_VECTOR");
9435 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9436 MI.eraseFromParent();
9442 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9443 MI.getFirst4RegLLTs();
9445 if (VecTy.isScalableVector())
9461 auto OutPos =
MIRBuilder.buildConstant(IdxTy, 0);
9464 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9467 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9470 std::optional<APInt> PassthruSplatVal =
9473 if (PassthruSplatVal.has_value()) {
9475 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9476 }
else if (HasPassthru) {
9477 auto Popcount =
MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9478 Popcount =
MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9484 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9488 unsigned NumElmts = VecTy.getNumElements();
9489 for (
unsigned I = 0;
I < NumElmts; ++
I) {
9491 auto Val =
MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9494 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9497 auto MaskI =
MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9502 OutPos =
MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9504 if (HasPassthru &&
I == NumElmts - 1) {
9507 auto AllLanesSelected =
MIRBuilder.buildICmp(
9509 OutPos =
MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9510 {OutPos, EndOfVector});
9514 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9516 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9521 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9523 MI.eraseFromParent();
9534 SPTmp =
MIRBuilder.buildCast(IntPtrTy, SPTmp);
9540 if (Alignment >
Align(1)) {
9543 auto AlignCst =
MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9552 const auto &MF = *
MI.getMF();
9558 Register AllocSize =
MI.getOperand(1).getReg();
9561 LLT PtrTy = MRI.getType(Dst);
9562 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9569 MI.eraseFromParent();
9575 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9580 MI.eraseFromParent();
9586 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9591 MI.eraseFromParent();
9597 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9598 unsigned Offset =
MI.getOperand(2).getImm();
9601 if (SrcTy.isVector()) {
9602 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9603 unsigned DstSize = DstTy.getSizeInBits();
9605 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9606 (
Offset + DstSize <= SrcTy.getSizeInBits())) {
9608 auto Unmerge =
MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9612 for (
unsigned Idx =
Offset / SrcEltSize;
9613 Idx < (
Offset + DstSize) / SrcEltSize; ++Idx) {
9614 SubVectorElts.
push_back(Unmerge.getReg(Idx));
9616 if (SubVectorElts.
size() == 1)
9617 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9619 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9621 MI.eraseFromParent();
9627 if ((SrcTy.isPointer() &&
9628 DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) ||
9629 (DstTy.isPointer() &&
9630 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace()))) {
9631 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9635 if ((DstTy.isScalar() || DstTy.isPointer()) &&
9636 (SrcTy.isScalar() || SrcTy.isPointer() ||
9637 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9638 LLT SrcIntTy = SrcTy;
9639 if (!SrcTy.isScalar()) {
9641 SrcReg =
MIRBuilder.buildCast(SrcIntTy, SrcReg).getReg(0);
9645 if (DstTy.isPointer())
9647 MRI.createGenericVirtualRegister(
LLT::scalar(DstTy.getSizeInBits()));
9653 auto Shr =
MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9657 if (DstTy.isPointer())
9660 MI.eraseFromParent();
9668 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
9671 LLT DstTy = MRI.getType(Src);
9672 LLT InsertTy = MRI.getType(InsertSrc);
9675 bool IsNonIntegralInsert =
9685 if ((IsNonIntegralInsert || IsNonIntegralDst) && InsertTy != EltTy) {
9686 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9693 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9695 auto UnmergeSrc =
MIRBuilder.buildUnmerge(EltTy, Src);
9699 for (; Idx <
Offset / EltSize; ++Idx) {
9700 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9705 auto UnmergeInsertSrc =
MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9706 for (
unsigned i = 0; Idx < (
Offset + InsertSize) / EltSize;
9708 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
9712 InsertSrc =
MIRBuilder.buildPtrToInt(EltTy, InsertSrc).getReg(0);
9714 InsertSrc =
MIRBuilder.buildIntToPtr(EltTy, InsertSrc).getReg(0);
9721 DstElts.
push_back(UnmergeSrc.getReg(Idx));
9724 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9725 MI.eraseFromParent();
9734 if (IsNonIntegralDst || IsNonIntegralInsert) {
9735 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
9739 LLT IntDstTy = DstTy;
9743 Src =
MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9748 InsertSrc =
MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9754 ExtInsSrc =
MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9760 auto Mask =
MIRBuilder.buildConstant(IntDstTy, MaskVal);
9761 auto MaskedSrc =
MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9762 auto Or =
MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9765 MI.eraseFromParent();
9771 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9772 MI.getFirst4RegLLTs();
9773 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
9776 LLT BoolTy = Dst1Ty;
9778 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9793 auto ResultLowerThanLHS =
9797 MIRBuilder.buildXor(Dst1, RHSNegative, ResultLowerThanLHS);
9801 auto LHSLessThanRHS =
9803 auto ResultNegative =
9805 MIRBuilder.buildXor(Dst1, LHSLessThanRHS, ResultNegative);
9809 MI.eraseFromParent();
9815 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9816 const LLT Ty = MRI.getType(Res);
9819 auto Tmp =
MIRBuilder.buildAdd(Ty, LHS, RHS);
9820 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9821 auto Sum =
MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9832 MI.eraseFromParent();
9837 auto [Res, OvOut, LHS, RHS, CarryIn] =
MI.getFirst5Regs();
9838 const LLT Ty = MRI.getType(Res);
9841 auto CarryZ =
MIRBuilder.buildZExt(Ty, CarryIn);
9842 auto RHSPlusCI =
MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9843 auto Diff =
MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9848 auto X2 =
MIRBuilder.buildXor(Ty, LHS, Diff);
9853 MI.eraseFromParent();
9859 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9860 LLT Ty = MRI.getType(Res);
9864 switch (
MI.getOpcode()) {
9867 case TargetOpcode::G_UADDSAT:
9870 BaseOp = TargetOpcode::G_ADD;
9872 case TargetOpcode::G_SADDSAT:
9875 BaseOp = TargetOpcode::G_ADD;
9877 case TargetOpcode::G_USUBSAT:
9880 BaseOp = TargetOpcode::G_SUB;
9882 case TargetOpcode::G_SSUBSAT:
9885 BaseOp = TargetOpcode::G_SUB;
9900 uint64_t NumBits = Ty.getScalarSizeInBits();
9911 auto NegOne =
MIRBuilder.buildConstant(Ty, -1);
9919 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9924 auto Min =
MIRBuilder.buildUMin(Ty, Not, RHS);
9925 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9928 MI.eraseFromParent();
9934 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
9935 LLT Ty = MRI.getType(Res);
9939 unsigned OverflowOp;
9940 switch (
MI.getOpcode()) {
9943 case TargetOpcode::G_UADDSAT:
9946 OverflowOp = TargetOpcode::G_UADDO;
9948 case TargetOpcode::G_SADDSAT:
9951 OverflowOp = TargetOpcode::G_SADDO;
9953 case TargetOpcode::G_USUBSAT:
9956 OverflowOp = TargetOpcode::G_USUBO;
9958 case TargetOpcode::G_SSUBSAT:
9961 OverflowOp = TargetOpcode::G_SSUBO;
9966 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9967 Register Tmp = OverflowRes.getReg(0);
9968 Register Ov = OverflowRes.getReg(1);
9977 uint64_t NumBits = Ty.getScalarSizeInBits();
9978 auto ShiftAmount =
MIRBuilder.buildConstant(Ty, NumBits - 1);
9979 auto Sign =
MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9982 Clamp =
MIRBuilder.buildAdd(Ty, Sign, MinVal);
9990 Clamp =
MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9994 MI.eraseFromParent();
10000 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
10001 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
10002 "Expected shlsat opcode!");
10003 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
10004 auto [Res, LHS, RHS] =
MI.getFirst3Regs();
10005 LLT Ty = MRI.getType(Res);
10009 auto Result =
MIRBuilder.buildShl(Ty, LHS, RHS);
10010 auto Orig = IsSigned ?
MIRBuilder.buildAShr(Ty, Result, RHS)
10019 SatVal =
MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
10024 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
10026 MI.eraseFromParent();
10031 auto [Dst, Src] =
MI.getFirst2Regs();
10032 const LLT Ty = MRI.getType(Src);
10033 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
10034 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
10037 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt);
10038 auto LSByteShiftedLeft =
MIRBuilder.buildShl(Ty, Src, ShiftAmt);
10039 auto MSByteShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10040 auto Res =
MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
10043 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
10045 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
10046 auto Mask =
MIRBuilder.buildConstant(Ty, APMask);
10047 auto ShiftAmt =
MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
10049 auto LoByte =
MIRBuilder.buildAnd(Ty, Src, Mask);
10050 auto LoShiftedLeft =
MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
10051 Res =
MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
10053 auto SrcShiftedRight =
MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
10054 auto HiShiftedRight =
MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
10055 Res =
MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
10057 Res.getInstr()->getOperand(0).setReg(Dst);
10059 MI.eraseFromParent();
10066 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
10069 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
10070 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
10071 return B.buildOr(Dst,
LHS,
RHS);
10076 auto [Dst, Src] =
MI.getFirst2Regs();
10077 const LLT SrcTy = MRI.getType(Src);
10078 unsigned Size = SrcTy.getScalarSizeInBits();
10079 unsigned VSize = SrcTy.getSizeInBits();
10082 if (SrcTy.isVector() && (VSize % 8 == 0) &&
10083 (LI.isLegal({TargetOpcode::G_BITREVERSE,
10084 {LLT::fixed_vector(VSize / 8, LLT::integer(8)),
10085 LLT::fixed_vector(VSize / 8, LLT::integer(8))}}))) {
10090 auto BSWAP =
MIRBuilder.buildBSwap(SrcTy, Src);
10091 auto Cast =
MIRBuilder.buildBitcast(VTy, BSWAP);
10092 auto RBIT =
MIRBuilder.buildBitReverse(VTy, Cast);
10096 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
10119 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
10123 Tmp2 = MIRBuilder.
buildShl(SrcTy, Src, ShAmt);
10126 Tmp2 = MIRBuilder.
buildLShr(SrcTy, Src, ShAmt);
10130 Tmp2 = MIRBuilder.
buildAnd(SrcTy, Tmp2, Mask);
10134 Tmp = MIRBuilder.
buildOr(SrcTy, Tmp, Tmp2);
10139 MI.eraseFromParent();
10147 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
10148 int NameOpIdx = IsRead ? 1 : 0;
10149 int ValRegIndex = IsRead ? 0 : 1;
10151 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
10152 const LLT Ty = MRI.getType(ValReg);
10154 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
10161 (IsRead ?
"llvm.read_register" :
"llvm.write_register"),
10162 Fn,
MI.getDebugLoc()));
10166 MI.eraseFromParent();
10175 MI.eraseFromParent();
10181 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
10182 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
10183 Register Result =
MI.getOperand(0).getReg();
10184 LLT OrigTy = MRI.getType(Result);
10188 auto LHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(1)});
10189 auto RHS =
MIRBuilder.buildInstr(ExtOp, {WideTy}, {
MI.getOperand(2)});
10191 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
10193 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, SizeInBits);
10194 auto Shifted =
MIRBuilder.buildInstr(ShiftOp, {WideTy}, {
Mul, ShiftAmt});
10197 MI.eraseFromParent();
10203 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10208 MI.eraseFromParent();
10213 MI.eraseFromParent();
10220 unsigned BitSize = SrcTy.getScalarSizeInBits();
10224 auto AsInt = SrcTy == IntTy ?
MIRBuilder.buildCopy(IntTy, SrcReg)
10231 APInt ExpMask = Inf;
10233 APInt QNaNBitMask =
10237 auto SignBitC =
MIRBuilder.buildConstant(IntTy, SignBit);
10238 auto ValueMaskC =
MIRBuilder.buildConstant(IntTy, ValueMask);
10239 auto InfC =
MIRBuilder.buildConstant(IntTy, Inf);
10240 auto ExpMaskC =
MIRBuilder.buildConstant(IntTy, ExpMask);
10241 auto ZeroC =
MIRBuilder.buildConstant(IntTy, 0);
10243 auto Abs =
MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
10247 auto Res =
MIRBuilder.buildConstant(DstTy, 0);
10249 LLT DstTyCopy = DstTy;
10251 Res =
MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
10279 auto ExpBits =
MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
10282 Mask &= ~PartialCheck;
10291 else if (PartialCheck ==
fcZero)
10303 auto OneC =
MIRBuilder.buildConstant(IntTy, 1);
10304 auto VMinusOne =
MIRBuilder.buildSub(IntTy, V, OneC);
10305 auto SubnormalRes =
10307 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
10309 SubnormalRes =
MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
10310 appendToRes(SubnormalRes);
10317 else if (PartialCheck ==
fcInf)
10322 auto NegInfC =
MIRBuilder.buildConstant(IntTy, NegInf);
10329 auto InfWithQnanBitC =
MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10330 if (PartialCheck ==
fcNan) {
10334 }
else if (PartialCheck ==
fcQNan) {
10344 Abs, InfWithQnanBitC);
10345 appendToRes(
MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10352 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
10354 IntTy, Abs,
MIRBuilder.buildConstant(IntTy, ExpLSB));
10355 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10358 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10360 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10363 DstTy, Sign,
MIRBuilder.buildConstant(DstTy, InversionMask));
10364 NormalRes =
MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10366 appendToRes(NormalRes);
10370 MI.eraseFromParent();
10376 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10377 MI.getFirst4RegLLTs();
10386 Op1Reg =
MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10387 Op1Ty = MRI.getType(Op1Reg);
10388 Op2Reg =
MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10389 Op2Ty = MRI.getType(Op2Reg);
10393 if (MaskTy.isScalar()) {
10401 MaskElt =
MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10404 MaskTy = DstTy.changeElementType(
LLT::integer(DstTy.getScalarSizeInBits()));
10406 MIRBuilder.buildSExtOrTrunc(MaskTy.getScalarType(), MaskElt).getReg(0);
10408 if (DstTy.isVector()) {
10410 auto ShufSplat =
MIRBuilder.buildShuffleSplat(MaskTy, MaskElt);
10411 MaskReg = ShufSplat.getReg(0);
10415 }
else if (!DstTy.isVector()) {
10420 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10424 if (!Op1Ty.getScalarType().isAnyScalar() &&
10425 !Op1Ty.getScalarType().isInteger())
10426 Op1Reg =
MIRBuilder.buildBitcast(Op1TyInt, Op1Reg).getReg(0);
10428 if (!Op2Ty.getScalarType().isAnyScalar() &&
10429 !Op2Ty.getScalarType().isInteger()) {
10431 Op2Ty.changeElementType(
LLT::integer(Op2Ty.getScalarSizeInBits()));
10432 Op2Reg =
MIRBuilder.buildBitcast(Op2TyInt, Op2Reg).getReg(0);
10435 auto NotMask =
MIRBuilder.buildNot(MaskTy, MaskReg);
10436 auto NewOp1 =
MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10437 auto NewOp2 =
MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10442 if (DstTy == Op1TyInt)
10445 auto Or =
MIRBuilder.buildOr(Op1TyInt, NewOp1, NewOp2);
10449 MI.eraseFromParent();
10455 unsigned Opcode =
MI.getOpcode();
10458 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10459 : TargetOpcode::G_UDIV,
10460 {
MI.getOperand(0).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10462 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10463 : TargetOpcode::G_UREM,
10464 {
MI.getOperand(1).getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
10465 MI.eraseFromParent();
10475 LLT DstTy = MRI.getType(
MI.getOperand(0).getReg());
10479 auto Shift =
MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10482 MI.eraseFromParent();
10492 Register SrcReg =
MI.getOperand(1).getReg();
10493 LLT Ty = MRI.getType(SrcReg);
10494 auto Zero =
MIRBuilder.buildConstant(Ty, 0);
10497 MI.eraseFromParent();
10503 Register SrcReg =
MI.getOperand(1).getReg();
10504 Register DestReg =
MI.getOperand(0).getReg();
10506 auto Zero =
MIRBuilder.buildConstant(Ty, 0).getReg(0);
10507 auto Sub =
MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10510 MI.eraseFromParent();
10516 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10517 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10518 "Expected G_ABDS or G_ABDU instruction");
10520 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10521 LLT Ty = MRI.getType(LHS);
10531 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10533 MI.eraseFromParent();
10539 assert((
MI.getOpcode() == TargetOpcode::G_ABDS ||
10540 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10541 "Expected G_ABDS or G_ABDU instruction");
10543 auto [DstReg, LHS, RHS] =
MI.getFirst3Regs();
10544 LLT Ty = MRI.getType(LHS);
10549 if (
MI.getOpcode() == TargetOpcode::G_ABDS) {
10550 MaxReg =
MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10551 MinReg =
MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10553 MaxReg =
MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10554 MinReg =
MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10556 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10558 MI.eraseFromParent();
10563 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
10568 if (!(SrcTy.getScalarType().isAnyScalar() ||
10569 SrcTy.getScalarType().isInteger())) {
10571 SrcTy.changeElementType(
LLT::integer(SrcTy.getScalarSizeInBits()));
10572 CastedSrc =
MIRBuilder.buildBitcast(SrcTyInt, SrcReg).getReg(0);
10575 if (MRI.getType(DstReg) != TyInt) {
10579 .buildAnd(TyInt, CastedSrc,
10582 DstTy.getScalarSizeInBits())))
10594 MI.eraseFromParent();
10600 Register SrcReg =
MI.getOperand(1).getReg();
10601 LLT SrcTy = MRI.getType(SrcReg);
10602 LLT DstTy = MRI.getType(SrcReg);
10605 if (SrcTy.isScalar()) {
10610 MI.setDesc(
MIRBuilder.getTII().get(TargetOpcode::COPY));
10621 Register ListPtr =
MI.getOperand(1).getReg();
10622 LLT PtrTy = MRI.getType(ListPtr);
10629 auto VAList =
MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10631 const Align A(
MI.getOperand(2).getImm());
10633 if (
A > TLI.getMinStackArgumentAlignment()) {
10635 MIRBuilder.buildConstant(PtrTyAsScalarTy,
A.value() - 1).getReg(0);
10636 auto AddDst =
MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10637 auto AndDst =
MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst,
Log2(
A));
10638 VAList = AndDst.getReg(0);
10645 LLT LLTTy = MRI.getType(Dst);
10648 MIRBuilder.buildConstant(PtrTyAsScalarTy,
DL.getTypeAllocSize(Ty));
10649 auto Succ =
MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10654 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10656 Align EltAlignment =
DL.getABITypeAlign(Ty);
10659 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10661 MI.eraseFromParent();
10666 [[maybe_unused]]
unsigned OpCode =
MI.getOpcode();
10667 assert((OpCode == TargetOpcode::G_SMULFIX ||
10668 OpCode == TargetOpcode::G_UMULFIX) &&
10669 "Operator must be either G_SMULFIX or G_UMULFIX!");
10670 auto [Dst, LHS, RHS] =
MI.getFirst3Regs();
10671 LLT Ty = MRI.getType(Dst);
10672 unsigned Scale =
MI.getOperand(3).getImm();
10676 MI.eraseFromParent();
10682 auto ShiftAmt =
MIRBuilder.buildConstant(WideTy, Scale);
10684 if (
MI.getOpcode() == TargetOpcode::G_SMULFIX) {
10693 if (
MI.getOpcode() == TargetOpcode::G_SMULFIX)
10700 MI.eraseFromParent();
10715 unsigned Limit,
const MemOp &
Op,
10716 unsigned DstAS,
unsigned SrcAS,
10717 const AttributeList &FuncAttributes,
10719 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
10729 if (
Op.isFixedDstAlign())
10730 while (
Op.getDstAlign() < Ty.getSizeInBytes() &&
10733 assert(Ty.getSizeInBits() > 0 &&
"Could not find valid type");
10737 unsigned NumMemOps = 0;
10740 unsigned TySize = Ty.getSizeInBytes();
10741 while (TySize >
Size) {
10751 assert(NewTySize > 0 &&
"Could not find appropriate type");
10758 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
10760 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
10766 TySize = NewTySize;
10770 if (++NumMemOps > Limit)
10773 MemOps.push_back(Ty);
10783 unsigned NumBits = Ty.getScalarSizeInBits();
10785 if (!Ty.isVector() && ValVRegAndVal) {
10786 APInt Scalar = ValVRegAndVal->Value.
trunc(8);
10794 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10815 uint64_t KnownLen,
Align Alignment,
10817 auto &MF = *
MI.getParent()->getParent();
10822 assert(KnownLen != 0 &&
"Have a zero length memset length!");
10824 bool DstAlignCanChange =
false;
10828 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10830 DstAlignCanChange =
true;
10832 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10833 std::vector<LLT> MemOps;
10835 const auto &DstMMO = **
MI.memoperands_begin();
10836 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10839 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10850 if (DstAlignCanChange) {
10853 Align NewAlign =
DL.getABITypeAlign(IRTy);
10854 if (NewAlign > Alignment) {
10855 Alignment = NewAlign;
10863 MachineIRBuilder MIB(
MI);
10865 LLT LargestTy = MemOps[0];
10866 for (
unsigned i = 1; i < MemOps.size(); i++)
10868 LargestTy = MemOps[i];
10880 LLT PtrTy = MRI.getType(Dst);
10881 unsigned DstOff = 0;
10882 unsigned Size = KnownLen;
10883 for (
unsigned I = 0;
I < MemOps.size();
I++) {
10884 LLT Ty = MemOps[
I];
10886 if (TySize >
Size) {
10889 assert(
I == MemOps.size() - 1 &&
I != 0);
10890 DstOff -= TySize -
Size;
10900 TLI.isTruncateFree(LargestVT, VT))
10901 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10914 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst,
Offset).getReg(0);
10917 MIB.buildStore(
Value, Ptr, *StoreMMO);
10922 MI.eraseFromParent();
10928 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10930 auto [Dst, Src, Len] =
MI.getFirst3Regs();
10932 const auto *MMOIt =
MI.memoperands_begin();
10934 bool IsVolatile =
MemOp->isVolatile();
10940 "inline memcpy with dynamic size is not yet supported");
10941 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10942 if (KnownLen == 0) {
10943 MI.eraseFromParent();
10947 const auto &DstMMO = **
MI.memoperands_begin();
10948 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10949 Align DstAlign = DstMMO.getBaseAlign();
10950 Align SrcAlign = SrcMMO.getBaseAlign();
10952 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10959 Align SrcAlign,
bool IsVolatile) {
10960 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10961 return lowerMemcpy(
MI, Dst, Src, KnownLen,
10962 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10969 Align SrcAlign,
bool IsVolatile) {
10970 auto &MF = *
MI.getParent()->getParent();
10975 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
10977 bool DstAlignCanChange =
false;
10979 Align Alignment = std::min(DstAlign, SrcAlign);
10983 DstAlignCanChange =
true;
10989 std::vector<LLT> MemOps;
10991 const auto &DstMMO = **
MI.memoperands_begin();
10992 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
10998 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
11004 if (DstAlignCanChange) {
11007 Align NewAlign =
DL.getABITypeAlign(IRTy);
11012 if (!
TRI->hasStackRealignment(MF))
11014 NewAlign = std::min(NewAlign, *StackAlign);
11016 if (NewAlign > Alignment) {
11017 Alignment = NewAlign;
11025 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
11027 MachineIRBuilder MIB(
MI);
11033 unsigned CurrOffset = 0;
11034 unsigned Size = KnownLen;
11035 for (
auto CopyTy : MemOps) {
11038 if (CopyTy.getSizeInBytes() >
Size)
11039 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
11050 if (CurrOffset != 0) {
11051 LLT SrcTy = MRI.getType(Src);
11055 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11057 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
11061 if (CurrOffset != 0) {
11062 LLT DstTy = MRI.getType(Dst);
11063 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11065 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
11066 CurrOffset += CopyTy.getSizeInBytes();
11067 Size -= CopyTy.getSizeInBytes();
11070 MI.eraseFromParent();
11076 uint64_t KnownLen,
Align DstAlign,
Align SrcAlign,
11078 auto &MF = *
MI.getParent()->getParent();
11083 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
11085 bool DstAlignCanChange =
false;
11088 Align Alignment = std::min(DstAlign, SrcAlign);
11090 MachineInstr *FIDef =
getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
11092 DstAlignCanChange =
true;
11094 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
11095 std::vector<LLT> MemOps;
11097 const auto &DstMMO = **
MI.memoperands_begin();
11098 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
11099 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
11100 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
11107 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
11113 if (DstAlignCanChange) {
11116 Align NewAlign =
DL.getABITypeAlign(IRTy);
11121 if (!
TRI->hasStackRealignment(MF))
11122 if (MaybeAlign StackAlign =
DL.getStackAlignment())
11123 NewAlign = std::min(NewAlign, *StackAlign);
11125 if (NewAlign > Alignment) {
11126 Alignment = NewAlign;
11134 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
11136 MachineIRBuilder MIB(
MI);
11140 unsigned CurrOffset = 0;
11141 SmallVector<Register, 16> LoadVals;
11142 for (
auto CopyTy : MemOps) {
11149 if (CurrOffset != 0) {
11150 LLT SrcTy = MRI.getType(Src);
11153 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src,
Offset).getReg(0);
11155 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
11156 CurrOffset += CopyTy.getSizeInBytes();
11160 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
11161 LLT CopyTy = MemOps[
I];
11167 if (CurrOffset != 0) {
11168 LLT DstTy = MRI.getType(Dst);
11171 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst,
Offset).getReg(0);
11173 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
11176 MI.eraseFromParent();
11182 const unsigned Opc =
MI.getOpcode();
11185 assert((
Opc == TargetOpcode::G_MEMCPY ||
Opc == TargetOpcode::G_MEMMOVE ||
11186 Opc == TargetOpcode::G_MEMSET) &&
11187 "Expected memcpy like instruction");
11189 auto MMOIt =
MI.memoperands_begin();
11194 auto [Dst, Src, Len] =
MI.getFirst3Regs();
11196 if (
Opc != TargetOpcode::G_MEMSET) {
11197 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
11198 MemOp = *(++MMOIt);
11199 SrcAlign =
MemOp->getBaseAlign();
11204 if (!LenVRegAndVal)
11206 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
11208 if (KnownLen == 0) {
11209 MI.eraseFromParent();
11213 if (MaxLen && KnownLen > MaxLen)
11216 bool IsVolatile =
MemOp->isVolatile();
11217 if (
Opc == TargetOpcode::G_MEMCPY) {
11218 auto &MF = *
MI.getParent()->getParent();
11221 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
11222 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
11225 if (
Opc == TargetOpcode::G_MEMMOVE)
11226 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
11227 if (
Opc == TargetOpcode::G_MEMSET)
11228 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
This file describes how to lower LLVM code to machine code.
static const fltSemantics & IEEEsingle()
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble()
static constexpr roundingMode rmNearestTiesToEven
opStatus
IEEE-754R 7: Default exception handling.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
AttributeList getAttributes() const
Return the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getValueReg() const
Get the stored value register.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
static constexpr LLT float64()
Get a 64-bit IEEE double value.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
LLT getScalarType() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr ElementCount getElementCount() const
static constexpr LLT float16()
Get a 16-bit IEEE half value.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr bool isPointerOrPointerVector() const
static LLT integer(unsigned SizeInBits)
static constexpr LLT bfloat16()
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
static constexpr LLT float32()
Get a 32-bit IEEE float value.
static LLT floatIEEE(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
This is an important class for using LLVM in a threaded context.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizerHelper::LegalizeResult createAtomicLibcall(MachineInstr &MI) const
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTLS(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B, const LibcallLoweringInfo *Libcalls=nullptr)
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPExtAndTruncMem(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMODF(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F32_TO_BF16(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI LegalizeResult conversionLibcall(MachineInstr &MI, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, bool IsSigned=false) const
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult createMemLibcall(MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Create a libcall to memcpy et al.
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMulfix(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult createLibcall(const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr) const
Helper function that creates a libcall to the given Name using the given calling convention CC.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver) const
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
Tracks which library functions to use for a particular subtarget.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
LLVM_ABI StringRef getString() const
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
Represent a constant reference to a string, i.e.
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
unsigned M1(unsigned Val)
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
auto dyn_cast_or_null(const Y &Val)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
@ Success
The lock was released successfully.
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
To bit_cast(const From &from) noexcept
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
@ Custom
The result value requires a custom uniformity check.
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.