42#define DEBUG_TYPE "legalizer"
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
55static std::pair<int, int>
61 unsigned NumParts =
Size / NarrowSize;
62 unsigned LeftoverSize =
Size - NumParts * NarrowSize;
65 if (LeftoverSize == 0)
70 if (LeftoverSize % EltSize != 0)
80 return std::make_pair(NumParts, NumLeftover);
107 : MIRBuilder(Builder), Observer(Observer),
MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
114 : MIRBuilder(
B), Observer(Observer),
MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
124 if (isa<GIntrinsic>(
MI))
127 switch (Step.Action) {
142 return bitcast(
MI, Step.TypeIdx, Step.NewType);
145 return lower(
MI, Step.TypeIdx, Step.NewType);
162void LegalizerHelper::insertParts(
Register DstReg,
184 assert(LeftoverRegs.
size() == 1 &&
"Expected one leftover register");
186 AllRegs.append(LeftoverRegs.
begin(), LeftoverRegs.
end());
187 return mergeMixedSubvectors(DstReg, AllRegs);
192 for (
auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
193 extractGCDType(GCDRegs, GCDTy, PartReg);
194 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
195 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
208void LegalizerHelper::mergeMixedSubvectors(
Register DstReg,
211 for (
unsigned i = 0; i < PartRegs.
size() - 1; ++i)
212 appendVectorElts(AllElts, PartRegs[i]);
218 appendVectorElts(AllElts, Leftover);
226 assert(
MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
228 const int StartIdx = Regs.
size();
229 const int NumResults =
MI.getNumOperands() - 1;
231 for (
int I = 0;
I != NumResults; ++
I)
232 Regs[StartIdx +
I] =
MI.getOperand(
I).getReg();
238 if (SrcTy == GCDTy) {
253 extractGCDType(Parts, GCDTy, SrcReg);
257LLT LegalizerHelper::buildLCMMergePieces(
LLT DstTy,
LLT NarrowTy,
LLT GCDTy,
259 unsigned PadStrategy) {
264 int NumOrigSrc = VRegs.
size();
270 if (NumOrigSrc < NumParts * NumSubParts) {
271 if (PadStrategy == TargetOpcode::G_ZEXT)
273 else if (PadStrategy == TargetOpcode::G_ANYEXT)
276 assert(PadStrategy == TargetOpcode::G_SEXT);
297 for (
int I = 0;
I != NumParts; ++
I) {
298 bool AllMergePartsArePadding =
true;
301 for (
int J = 0; J != NumSubParts; ++J) {
302 int Idx =
I * NumSubParts + J;
303 if (
Idx >= NumOrigSrc) {
304 SubMerge[J] = PadReg;
308 SubMerge[J] = VRegs[
Idx];
311 AllMergePartsArePadding =
false;
317 if (AllMergePartsArePadding && !AllPadReg) {
318 if (PadStrategy == TargetOpcode::G_ANYEXT)
320 else if (PadStrategy == TargetOpcode::G_ZEXT)
330 Remerge[
I] = AllPadReg;
334 if (NumSubParts == 1)
335 Remerge[
I] = SubMerge[0];
340 if (AllMergePartsArePadding && !AllPadReg)
341 AllPadReg = Remerge[
I];
344 VRegs = std::move(Remerge);
348void LegalizerHelper::buildWidenedRemergeToDst(
Register DstReg,
LLT LCMTy,
355 if (DstTy == LCMTy) {
369 UnmergeDefs[0] = DstReg;
370 for (
unsigned I = 1;
I != NumDefs; ++
I)
382#define RTLIBCASE_INT(LibcallPrefix) \
386 return RTLIB::LibcallPrefix##32; \
388 return RTLIB::LibcallPrefix##64; \
390 return RTLIB::LibcallPrefix##128; \
392 llvm_unreachable("unexpected size"); \
396#define RTLIBCASE(LibcallPrefix) \
400 return RTLIB::LibcallPrefix##32; \
402 return RTLIB::LibcallPrefix##64; \
404 return RTLIB::LibcallPrefix##80; \
406 return RTLIB::LibcallPrefix##128; \
408 llvm_unreachable("unexpected size"); \
413 case TargetOpcode::G_MUL:
415 case TargetOpcode::G_SDIV:
417 case TargetOpcode::G_UDIV:
419 case TargetOpcode::G_SREM:
421 case TargetOpcode::G_UREM:
423 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
425 case TargetOpcode::G_FADD:
427 case TargetOpcode::G_FSUB:
429 case TargetOpcode::G_FMUL:
431 case TargetOpcode::G_FDIV:
433 case TargetOpcode::G_FEXP:
435 case TargetOpcode::G_FEXP2:
437 case TargetOpcode::G_FEXP10:
439 case TargetOpcode::G_FREM:
441 case TargetOpcode::G_FPOW:
443 case TargetOpcode::G_FPOWI:
445 case TargetOpcode::G_FMA:
447 case TargetOpcode::G_FSIN:
449 case TargetOpcode::G_FCOS:
451 case TargetOpcode::G_FTAN:
453 case TargetOpcode::G_FASIN:
455 case TargetOpcode::G_FACOS:
457 case TargetOpcode::G_FATAN:
459 case TargetOpcode::G_FATAN2:
461 case TargetOpcode::G_FSINH:
463 case TargetOpcode::G_FCOSH:
465 case TargetOpcode::G_FTANH:
467 case TargetOpcode::G_FLOG10:
469 case TargetOpcode::G_FLOG:
471 case TargetOpcode::G_FLOG2:
473 case TargetOpcode::G_FLDEXP:
475 case TargetOpcode::G_FCEIL:
477 case TargetOpcode::G_FFLOOR:
479 case TargetOpcode::G_FMINNUM:
481 case TargetOpcode::G_FMAXNUM:
483 case TargetOpcode::G_FSQRT:
485 case TargetOpcode::G_FRINT:
487 case TargetOpcode::G_FNEARBYINT:
489 case TargetOpcode::G_INTRINSIC_TRUNC:
491 case TargetOpcode::G_INTRINSIC_ROUND:
493 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
495 case TargetOpcode::G_INTRINSIC_LRINT:
497 case TargetOpcode::G_INTRINSIC_LLRINT:
525 if (CallerAttrs.
hasRetAttr(Attribute::ZExt) ||
537 if (
MI.getOpcode() == TargetOpcode::G_BZERO)
544 if (!VReg.
isVirtual() || VReg != Next->getOperand(1).getReg())
547 Register PReg = Next->getOperand(0).getReg();
555 if (Ret->getNumImplicitOperands() != 1)
558 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
582 Info.OrigRet = Result;
585 (Result.Ty->isVoidTy() ||
590 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
591 if (!CLI.lowerCall(MIRBuilder,
Info))
594 if (
MI &&
Info.LoweredTailCall) {
595 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
606 "Expected instr following MI to be return or debug inst?");
610 }
while (
MI->getNextNode());
640 Args.push_back({MO.getReg(), OpType, 0});
642 {
MI.getOperand(0).
getReg(), OpType, 0}, Args,
653 for (
unsigned i = 0; i <
MI.getNumOperands() - 1; ++i) {
657 LLT OpLLT =
MRI.getType(Reg);
658 Type *OpTy =
nullptr;
663 Args.push_back({Reg, OpTy, 0});
669 unsigned Opc =
MI.getOpcode();
671 case TargetOpcode::G_BZERO:
672 RTLibcall = RTLIB::BZERO;
674 case TargetOpcode::G_MEMCPY:
675 RTLibcall = RTLIB::MEMCPY;
676 Args[0].Flags[0].setReturned();
678 case TargetOpcode::G_MEMMOVE:
679 RTLibcall = RTLIB::MEMMOVE;
680 Args[0].Flags[0].setReturned();
682 case TargetOpcode::G_MEMSET:
683 RTLibcall = RTLIB::MEMSET;
684 Args[0].Flags[0].setReturned();
689 const char *
Name = TLI.getLibcallName(RTLibcall);
699 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
703 MI.getOperand(
MI.getNumOperands() - 1).getImm() &&
706 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
707 if (!CLI.lowerCall(MIRBuilder,
Info))
710 if (
Info.LoweredTailCall) {
711 assert(
Info.IsTailCall &&
"Lowered tail call when it wasn't a tail call?");
722 "Expected instr following MI to be return or debug inst?");
726 }
while (
MI.getNextNode());
736 unsigned Opc =
MI.getOpcode();
737 auto &AtomicMI = cast<GMemOperation>(
MI);
738 auto &MMO = AtomicMI.getMMO();
739 auto Ordering = MMO.getMergedOrdering();
740 LLT MemType = MMO.getMemoryType();
743 return RTLIB::UNKNOWN_LIBCALL;
745#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
747 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
749 case TargetOpcode::G_ATOMIC_CMPXCHG:
750 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
752 return getOutlineAtomicHelper(LC, Ordering, MemSize);
754 case TargetOpcode::G_ATOMICRMW_XCHG: {
756 return getOutlineAtomicHelper(LC, Ordering, MemSize);
758 case TargetOpcode::G_ATOMICRMW_ADD:
759 case TargetOpcode::G_ATOMICRMW_SUB: {
761 return getOutlineAtomicHelper(LC, Ordering, MemSize);
763 case TargetOpcode::G_ATOMICRMW_AND: {
765 return getOutlineAtomicHelper(LC, Ordering, MemSize);
767 case TargetOpcode::G_ATOMICRMW_OR: {
769 return getOutlineAtomicHelper(LC, Ordering, MemSize);
771 case TargetOpcode::G_ATOMICRMW_XOR: {
773 return getOutlineAtomicHelper(LC, Ordering, MemSize);
776 return RTLIB::UNKNOWN_LIBCALL;
789 unsigned Opc =
MI.getOpcode();
791 case TargetOpcode::G_ATOMIC_CMPXCHG:
792 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
795 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
796 MI.getFirst4RegLLTs();
799 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
800 std::tie(Ret, RetLLT,
Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
801 NewLLT) =
MI.getFirst5RegLLTs();
811 case TargetOpcode::G_ATOMICRMW_XCHG:
812 case TargetOpcode::G_ATOMICRMW_ADD:
813 case TargetOpcode::G_ATOMICRMW_SUB:
814 case TargetOpcode::G_ATOMICRMW_AND:
815 case TargetOpcode::G_ATOMICRMW_OR:
816 case TargetOpcode::G_ATOMICRMW_XOR: {
817 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] =
MI.getFirst3RegLLTs();
820 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
824 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
839 const char *
Name = TLI.getLibcallName(RTLibcall);
849 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
853 std::copy(Args.begin(), Args.end(), std::back_inserter(
Info.OrigArgs));
854 if (!CLI.lowerCall(MIRBuilder,
Info))
866 case TargetOpcode::G_FPEXT:
868 case TargetOpcode::G_FPTRUNC:
870 case TargetOpcode::G_FPTOSI:
872 case TargetOpcode::G_FPTOUI:
874 case TargetOpcode::G_SITOFP:
876 case TargetOpcode::G_UITOFP:
887 if (FromType->isIntegerTy()) {
889 Arg.
Flags[0].setSExt();
891 Arg.
Flags[0].setZExt();
896 {
MI.getOperand(0).
getReg(), ToType, 0}, Arg, LocObserver,
903 switch (
MI.getOpcode()) {
904 case TargetOpcode::G_GET_FPENV:
905 RTLibcall = RTLIB::FEGETENV;
907 case TargetOpcode::G_SET_FPENV:
908 case TargetOpcode::G_RESET_FPENV:
909 RTLibcall = RTLIB::FESETENV;
911 case TargetOpcode::G_GET_FPMODE:
912 RTLibcall = RTLIB::FEGETMODE;
914 case TargetOpcode::G_SET_FPMODE:
915 case TargetOpcode::G_RESET_FPMODE:
916 RTLibcall = RTLIB::FESETMODE;
945 auto &Ctx = MF.getFunction().getContext();
956 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
963 LocObserver,
nullptr);
985 auto &Ctx = MF.getFunction().getContext();
1001 unsigned TempAddrSpace =
DL.getAllocaAddrSpace();
1007 LocObserver,
nullptr);
1013static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1015#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1019 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1021 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1023 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1025 llvm_unreachable("unexpected size"); \
1068 const auto Cond =
Cmp->getCond();
1082 {{
Cmp->getLHSReg(), OpType, 0}, {
Cmp->getRHSReg(), OpType, 1}},
1095 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1097 if (BuildLibcall(
Libcall, ICmpPred, DstReg)) {
1110 const auto [OeqLibcall, OeqPred] =
1112 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1114 const auto [UnoLibcall, UnoPred] =
1116 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1131 const auto [OeqLibcall, OeqPred] =
1136 const auto [UnoLibcall, UnoPred] =
1141 if (NotOeq && NotUno)
1160 const auto [InversedLibcall, InversedPred] =
1162 if (!BuildLibcall(InversedLibcall,
1188 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
1190 unsigned PtrSize =
DL.getPointerSizeInBits(AddrSpace);
1207 switch (
MI.getOpcode()) {
1210 case TargetOpcode::G_MUL:
1211 case TargetOpcode::G_SDIV:
1212 case TargetOpcode::G_UDIV:
1213 case TargetOpcode::G_SREM:
1214 case TargetOpcode::G_UREM:
1215 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1224 case TargetOpcode::G_FADD:
1225 case TargetOpcode::G_FSUB:
1226 case TargetOpcode::G_FMUL:
1227 case TargetOpcode::G_FDIV:
1228 case TargetOpcode::G_FMA:
1229 case TargetOpcode::G_FPOW:
1230 case TargetOpcode::G_FREM:
1231 case TargetOpcode::G_FCOS:
1232 case TargetOpcode::G_FSIN:
1233 case TargetOpcode::G_FTAN:
1234 case TargetOpcode::G_FACOS:
1235 case TargetOpcode::G_FASIN:
1236 case TargetOpcode::G_FATAN:
1237 case TargetOpcode::G_FATAN2:
1238 case TargetOpcode::G_FCOSH:
1239 case TargetOpcode::G_FSINH:
1240 case TargetOpcode::G_FTANH:
1241 case TargetOpcode::G_FLOG10:
1242 case TargetOpcode::G_FLOG:
1243 case TargetOpcode::G_FLOG2:
1244 case TargetOpcode::G_FEXP:
1245 case TargetOpcode::G_FEXP2:
1246 case TargetOpcode::G_FEXP10:
1247 case TargetOpcode::G_FCEIL:
1248 case TargetOpcode::G_FFLOOR:
1249 case TargetOpcode::G_FMINNUM:
1250 case TargetOpcode::G_FMAXNUM:
1251 case TargetOpcode::G_FSQRT:
1252 case TargetOpcode::G_FRINT:
1253 case TargetOpcode::G_FNEARBYINT:
1254 case TargetOpcode::G_INTRINSIC_TRUNC:
1255 case TargetOpcode::G_INTRINSIC_ROUND:
1256 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1261 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1269 case TargetOpcode::G_INTRINSIC_LRINT:
1270 case TargetOpcode::G_INTRINSIC_LLRINT: {
1277 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1283 {{
MI.getOperand(1).
getReg(), HLTy, 0}}, LocObserver, &
MI);
1286 MI.eraseFromParent();
1289 case TargetOpcode::G_FPOWI:
1290 case TargetOpcode::G_FLDEXP: {
1297 LLVM_DEBUG(
dbgs() <<
"No libcall available for type " << LLTy <<
".\n");
1302 {
MI.getOperand(1).getReg(), HLTy, 0},
1303 {
MI.getOperand(2).getReg(), ITy, 1}};
1304 Args[1].Flags[0].setSExt();
1307 Args, LocObserver, &
MI);
1312 case TargetOpcode::G_FPEXT:
1313 case TargetOpcode::G_FPTRUNC: {
1316 if (!FromTy || !ToTy)
1324 case TargetOpcode::G_FCMP: {
1328 MI.eraseFromParent();
1331 case TargetOpcode::G_FPTOSI:
1332 case TargetOpcode::G_FPTOUI: {
1337 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1345 case TargetOpcode::G_SITOFP:
1346 case TargetOpcode::G_UITOFP: {
1350 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1352 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SITOFP;
1355 LocObserver, TLI, IsSigned);
1360 case TargetOpcode::G_ATOMICRMW_XCHG:
1361 case TargetOpcode::G_ATOMICRMW_ADD:
1362 case TargetOpcode::G_ATOMICRMW_SUB:
1363 case TargetOpcode::G_ATOMICRMW_AND:
1364 case TargetOpcode::G_ATOMICRMW_OR:
1365 case TargetOpcode::G_ATOMICRMW_XOR:
1366 case TargetOpcode::G_ATOMIC_CMPXCHG:
1367 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1373 case TargetOpcode::G_BZERO:
1374 case TargetOpcode::G_MEMCPY:
1375 case TargetOpcode::G_MEMMOVE:
1376 case TargetOpcode::G_MEMSET: {
1381 MI.eraseFromParent();
1384 case TargetOpcode::G_GET_FPENV:
1385 case TargetOpcode::G_GET_FPMODE: {
1391 case TargetOpcode::G_SET_FPENV:
1392 case TargetOpcode::G_SET_FPMODE: {
1398 case TargetOpcode::G_RESET_FPENV:
1399 case TargetOpcode::G_RESET_FPMODE: {
1408 MI.eraseFromParent();
1418 switch (
MI.getOpcode()) {
1421 case TargetOpcode::G_IMPLICIT_DEF: {
1431 if (SizeOp0 % NarrowSize != 0) {
1432 LLT ImplicitTy = NarrowTy;
1439 MI.eraseFromParent();
1443 int NumParts = SizeOp0 / NarrowSize;
1446 for (
int i = 0; i < NumParts; ++i)
1453 MI.eraseFromParent();
1456 case TargetOpcode::G_CONSTANT: {
1458 const APInt &Val =
MI.getOperand(1).getCImm()->getValue();
1461 int NumParts = TotalSize / NarrowSize;
1464 for (
int I = 0;
I != NumParts; ++
I) {
1465 unsigned Offset =
I * NarrowSize;
1472 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1474 if (LeftoverBits != 0) {
1478 Val.
lshr(NumParts * NarrowSize).
trunc(LeftoverBits));
1482 insertParts(
MI.getOperand(0).getReg(),
1483 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1485 MI.eraseFromParent();
1488 case TargetOpcode::G_SEXT:
1489 case TargetOpcode::G_ZEXT:
1490 case TargetOpcode::G_ANYEXT:
1492 case TargetOpcode::G_TRUNC: {
1498 LLVM_DEBUG(
dbgs() <<
"Can't narrow trunc to type " << NarrowTy <<
"\n");
1504 MI.eraseFromParent();
1507 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1508 case TargetOpcode::G_FREEZE: {
1519 for (
unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1526 MI.eraseFromParent();
1529 case TargetOpcode::G_ADD:
1530 case TargetOpcode::G_SUB:
1531 case TargetOpcode::G_SADDO:
1532 case TargetOpcode::G_SSUBO:
1533 case TargetOpcode::G_SADDE:
1534 case TargetOpcode::G_SSUBE:
1535 case TargetOpcode::G_UADDO:
1536 case TargetOpcode::G_USUBO:
1537 case TargetOpcode::G_UADDE:
1538 case TargetOpcode::G_USUBE:
1540 case TargetOpcode::G_MUL:
1541 case TargetOpcode::G_UMULH:
1543 case TargetOpcode::G_EXTRACT:
1545 case TargetOpcode::G_INSERT:
1547 case TargetOpcode::G_LOAD: {
1548 auto &LoadMI = cast<GLoad>(
MI);
1549 Register DstReg = LoadMI.getDstReg();
1554 if (8 * LoadMI.getMemSize().getValue() != DstTy.
getSizeInBits()) {
1558 LoadMI.eraseFromParent();
1564 case TargetOpcode::G_ZEXTLOAD:
1565 case TargetOpcode::G_SEXTLOAD: {
1566 auto &LoadMI = cast<GExtLoad>(
MI);
1567 Register DstReg = LoadMI.getDstReg();
1568 Register PtrReg = LoadMI.getPointerReg();
1571 auto &MMO = LoadMI.getMMO();
1574 if (MemSize == NarrowSize) {
1576 }
else if (MemSize < NarrowSize) {
1578 }
else if (MemSize > NarrowSize) {
1583 if (isa<GZExtLoad>(LoadMI))
1588 LoadMI.eraseFromParent();
1591 case TargetOpcode::G_STORE: {
1592 auto &StoreMI = cast<GStore>(
MI);
1594 Register SrcReg = StoreMI.getValueReg();
1599 int NumParts = SizeOp0 / NarrowSize;
1601 unsigned LeftoverBits = SrcTy.
getSizeInBits() - HandledSize;
1602 if (SrcTy.
isVector() && LeftoverBits != 0)
1605 if (8 * StoreMI.getMemSize().getValue() != SrcTy.
getSizeInBits()) {
1609 StoreMI.eraseFromParent();
1615 case TargetOpcode::G_SELECT:
1617 case TargetOpcode::G_AND:
1618 case TargetOpcode::G_OR:
1619 case TargetOpcode::G_XOR: {
1631 case TargetOpcode::G_SHL:
1632 case TargetOpcode::G_LSHR:
1633 case TargetOpcode::G_ASHR:
1635 case TargetOpcode::G_CTLZ:
1636 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1637 case TargetOpcode::G_CTTZ:
1638 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1639 case TargetOpcode::G_CTPOP:
1641 switch (
MI.getOpcode()) {
1642 case TargetOpcode::G_CTLZ:
1643 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1645 case TargetOpcode::G_CTTZ:
1646 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1648 case TargetOpcode::G_CTPOP:
1658 case TargetOpcode::G_INTTOPTR:
1666 case TargetOpcode::G_PTRTOINT:
1674 case TargetOpcode::G_PHI: {
1677 if (SizeOp0 % NarrowSize != 0)
1680 unsigned NumParts = SizeOp0 / NarrowSize;
1684 for (
unsigned i = 1; i <
MI.getNumOperands(); i += 2) {
1692 for (
unsigned i = 0; i < NumParts; ++i) {
1696 for (
unsigned j = 1; j <
MI.getNumOperands(); j += 2)
1697 MIB.
addUse(SrcRegs[j / 2][i]).
add(
MI.getOperand(j + 1));
1702 MI.eraseFromParent();
1705 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1706 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1710 int OpIdx =
MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1716 case TargetOpcode::G_ICMP: {
1730 if (!
extractParts(
MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1731 RHSPartRegs, RHSLeftoverRegs,
MIRBuilder, MRI))
1744 for (
auto LHSAndRHS :
zip(LHSPartRegs, RHSPartRegs)) {
1745 auto LHS = std::get<0>(LHSAndRHS);
1746 auto RHS = std::get<1>(LHSAndRHS);
1754 for (
auto LHSAndRHS :
zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1755 auto LHS = std::get<0>(LHSAndRHS);
1756 auto RHS = std::get<1>(LHSAndRHS);
1758 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy,
Xor);
1759 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1760 TargetOpcode::G_ZEXT);
1767 assert(Xors.
size() >= 2 &&
"Should have gotten at least two Xors?");
1769 for (
unsigned I = 2, E = Xors.
size();
I < E; ++
I)
1774 for (
unsigned I = 0, E = LHSPartRegs.
size();
I != E; ++
I) {
1778 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1793 LHSPartRegs[
I], RHSPartRegs[
I]);
1800 for (
unsigned I = 0, E = LHSLeftoverRegs.
size();
I != E; ++
I) {
1804 if (
I == E - 1 && LHSLeftoverRegs.
empty()) {
1814 RHSLeftoverRegs[
I]);
1817 RHSLeftoverRegs[
I]);
1820 LHSLeftoverRegs[
I], RHSLeftoverRegs[
I]);
1827 MI.eraseFromParent();
1830 case TargetOpcode::G_FCMP:
1839 case TargetOpcode::G_SEXT_INREG: {
1843 int64_t SizeInBits =
MI.getOperand(2).getImm();
1853 MO1.
setReg(TruncMIB.getReg(0));
1868 if (SizeOp0 % NarrowSize != 0)
1870 int NumParts = SizeOp0 / NarrowSize;
1878 for (
int i = 0; i < NumParts; ++i) {
1894 for (
int i = 0; i < NumParts; ++i) {
1897 PartialExtensionReg = DstRegs.
back();
1899 assert(PartialExtensionReg &&
1900 "Expected to visit partial extension before full");
1901 if (FullExtensionReg) {
1908 FullExtensionReg = DstRegs.
back();
1913 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1916 PartialExtensionReg = DstRegs.
back();
1923 MI.eraseFromParent();
1926 case TargetOpcode::G_BSWAP:
1927 case TargetOpcode::G_BITREVERSE: {
1928 if (SizeOp0 % NarrowSize != 0)
1933 unsigned NumParts = SizeOp0 / NarrowSize;
1934 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1937 for (
unsigned i = 0; i < NumParts; ++i) {
1939 {SrcRegs[NumParts - 1 - i]});
1946 MI.eraseFromParent();
1949 case TargetOpcode::G_PTR_ADD:
1950 case TargetOpcode::G_PTRMASK: {
1958 case TargetOpcode::G_FPTOUI:
1959 case TargetOpcode::G_FPTOSI:
1960 case TargetOpcode::G_FPTOUI_SAT:
1961 case TargetOpcode::G_FPTOSI_SAT:
1963 case TargetOpcode::G_FPEXT:
1970 case TargetOpcode::G_FLDEXP:
1971 case TargetOpcode::G_STRICT_FLDEXP:
1973 case TargetOpcode::G_VSCALE: {
1984 MI.eraseFromParent();
2012 unsigned OpIdx,
unsigned ExtOpcode) {
2015 MO.
setReg(ExtB.getReg(0));
2022 MO.
setReg(ExtB.getReg(0));
2026 unsigned OpIdx,
unsigned TruncOpcode) {
2035 unsigned OpIdx,
unsigned ExtOpcode) {
2074LegalizerHelper::widenScalarMergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2079 auto [DstReg, DstTy, Src1Reg, Src1Ty] =
MI.getFirst2RegLLTs();
2080 if (DstTy.isVector())
2087 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2089 unsigned NumOps =
MI.getNumOperands();
2090 unsigned NumSrc =
MI.getNumOperands() - 1;
2091 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2093 if (WideSize >= DstSize) {
2097 for (
unsigned I = 2;
I != NumOps; ++
I) {
2098 const unsigned Offset = (
I - 1) * PartSize;
2105 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
2111 ResultReg = NextResult;
2114 if (WideSize > DstSize)
2116 else if (DstTy.isPointer())
2119 MI.eraseFromParent();
2144 const int GCD = std::gcd(SrcSize, WideSize);
2155 if (GCD == SrcSize) {
2159 for (
int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2165 if (
static_cast<int>(Unmerges.
size()) != NumMerge * WideSize) {
2167 for (
int I = Unmerges.
size();
I != NumMerge * WideSize; ++
I)
2171 const int PartsPerGCD = WideSize / GCD;
2175 for (
int I = 0;
I != NumMerge; ++
I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2190 MI.eraseFromParent();
2195LegalizerHelper::widenScalarUnmergeValues(
MachineInstr &
MI,
unsigned TypeIdx,
2200 int NumDst =
MI.getNumOperands() - 1;
2201 Register SrcReg =
MI.getOperand(NumDst).getReg();
2206 Register Dst0Reg =
MI.getOperand(0).getReg();
2216 dbgs() <<
"Not casting non-integral address space integer\n");
2237 for (
int I = 1;
I != NumDst; ++
I) {
2243 MI.eraseFromParent();
2254 LLVM_DEBUG(
dbgs() <<
"Widening pointer source types not implemented\n");
2279 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2284 if (PartsPerRemerge == 1) {
2287 for (
int I = 0;
I != NumUnmerge; ++
I) {
2290 for (
int J = 0; J != PartsPerUnmerge; ++J) {
2291 int Idx =
I * PartsPerUnmerge + J;
2293 MIB.addDef(
MI.getOperand(
Idx).getReg());
2300 MIB.addUse(Unmerge.getReg(
I));
2304 for (
int J = 0; J != NumUnmerge; ++J)
2305 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2308 for (
int I = 0;
I != NumDst; ++
I) {
2309 for (
int J = 0; J < PartsPerRemerge; ++J) {
2310 const int Idx =
I * PartsPerRemerge + J;
2315 RemergeParts.
clear();
2319 MI.eraseFromParent();
2324LegalizerHelper::widenScalarExtract(
MachineInstr &
MI,
unsigned TypeIdx,
2326 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
2327 unsigned Offset =
MI.getOperand(2).getImm();
2330 if (SrcTy.
isVector() || DstTy.isVector())
2346 if (DstTy.isPointer())
2353 MI.eraseFromParent();
2358 LLT ShiftTy = SrcTy;
2367 MI.eraseFromParent();
2398LegalizerHelper::widenScalarInsert(
MachineInstr &
MI,
unsigned TypeIdx,
2400 if (TypeIdx != 0 || WideTy.
isVector())
2410LegalizerHelper::widenScalarAddSubOverflow(
MachineInstr &
MI,
unsigned TypeIdx,
2414 std::optional<Register> CarryIn;
2415 switch (
MI.getOpcode()) {
2418 case TargetOpcode::G_SADDO:
2419 Opcode = TargetOpcode::G_ADD;
2420 ExtOpcode = TargetOpcode::G_SEXT;
2422 case TargetOpcode::G_SSUBO:
2423 Opcode = TargetOpcode::G_SUB;
2424 ExtOpcode = TargetOpcode::G_SEXT;
2426 case TargetOpcode::G_UADDO:
2427 Opcode = TargetOpcode::G_ADD;
2428 ExtOpcode = TargetOpcode::G_ZEXT;
2430 case TargetOpcode::G_USUBO:
2431 Opcode = TargetOpcode::G_SUB;
2432 ExtOpcode = TargetOpcode::G_ZEXT;
2434 case TargetOpcode::G_SADDE:
2435 Opcode = TargetOpcode::G_UADDE;
2436 ExtOpcode = TargetOpcode::G_SEXT;
2437 CarryIn =
MI.getOperand(4).getReg();
2439 case TargetOpcode::G_SSUBE:
2440 Opcode = TargetOpcode::G_USUBE;
2441 ExtOpcode = TargetOpcode::G_SEXT;
2442 CarryIn =
MI.getOperand(4).getReg();
2444 case TargetOpcode::G_UADDE:
2445 Opcode = TargetOpcode::G_UADDE;
2446 ExtOpcode = TargetOpcode::G_ZEXT;
2447 CarryIn =
MI.getOperand(4).getReg();
2449 case TargetOpcode::G_USUBE:
2450 Opcode = TargetOpcode::G_USUBE;
2451 ExtOpcode = TargetOpcode::G_ZEXT;
2452 CarryIn =
MI.getOperand(4).getReg();
2473 LLT CarryOutTy = MRI.
getType(
MI.getOperand(1).getReg());
2476 {LHSExt, RHSExt, *CarryIn})
2488 MI.eraseFromParent();
2493LegalizerHelper::widenScalarAddSubShlSat(
MachineInstr &
MI,
unsigned TypeIdx,
2495 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2496 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2497 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2498 bool IsShift =
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2499 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2524 {ShiftL, ShiftR},
MI.getFlags());
2532 MI.eraseFromParent();
2537LegalizerHelper::widenScalarMulo(
MachineInstr &
MI,
unsigned TypeIdx,
2546 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULO;
2549 LLT OverflowTy = MRI.
getType(OriginalOverflow);
2556 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2565 WideMulCanOverflow ?
MI.getOpcode() : (
unsigned)TargetOpcode::G_MUL;
2568 if (WideMulCanOverflow)
2570 {LeftOperand, RightOperand});
2591 if (WideMulCanOverflow) {
2599 MI.eraseFromParent();
2605 unsigned Opcode =
MI.getOpcode();
2609 case TargetOpcode::G_ATOMICRMW_XCHG:
2610 case TargetOpcode::G_ATOMICRMW_ADD:
2611 case TargetOpcode::G_ATOMICRMW_SUB:
2612 case TargetOpcode::G_ATOMICRMW_AND:
2613 case TargetOpcode::G_ATOMICRMW_OR:
2614 case TargetOpcode::G_ATOMICRMW_XOR:
2615 case TargetOpcode::G_ATOMICRMW_MIN:
2616 case TargetOpcode::G_ATOMICRMW_MAX:
2617 case TargetOpcode::G_ATOMICRMW_UMIN:
2618 case TargetOpcode::G_ATOMICRMW_UMAX:
2619 assert(TypeIdx == 0 &&
"atomicrmw with second scalar type");
2625 case TargetOpcode::G_ATOMIC_CMPXCHG:
2626 assert(TypeIdx == 0 &&
"G_ATOMIC_CMPXCHG with second scalar type");
2633 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2643 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2648 case TargetOpcode::G_EXTRACT:
2649 return widenScalarExtract(
MI, TypeIdx, WideTy);
2650 case TargetOpcode::G_INSERT:
2651 return widenScalarInsert(
MI, TypeIdx, WideTy);
2652 case TargetOpcode::G_MERGE_VALUES:
2653 return widenScalarMergeValues(
MI, TypeIdx, WideTy);
2654 case TargetOpcode::G_UNMERGE_VALUES:
2655 return widenScalarUnmergeValues(
MI, TypeIdx, WideTy);
2656 case TargetOpcode::G_SADDO:
2657 case TargetOpcode::G_SSUBO:
2658 case TargetOpcode::G_UADDO:
2659 case TargetOpcode::G_USUBO:
2660 case TargetOpcode::G_SADDE:
2661 case TargetOpcode::G_SSUBE:
2662 case TargetOpcode::G_UADDE:
2663 case TargetOpcode::G_USUBE:
2664 return widenScalarAddSubOverflow(
MI, TypeIdx, WideTy);
2665 case TargetOpcode::G_UMULO:
2666 case TargetOpcode::G_SMULO:
2667 return widenScalarMulo(
MI, TypeIdx, WideTy);
2668 case TargetOpcode::G_SADDSAT:
2669 case TargetOpcode::G_SSUBSAT:
2670 case TargetOpcode::G_SSHLSAT:
2671 case TargetOpcode::G_UADDSAT:
2672 case TargetOpcode::G_USUBSAT:
2673 case TargetOpcode::G_USHLSAT:
2674 return widenScalarAddSubShlSat(
MI, TypeIdx, WideTy);
2675 case TargetOpcode::G_CTTZ:
2676 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2677 case TargetOpcode::G_CTLZ:
2678 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2679 case TargetOpcode::G_CTPOP: {
2690 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2691 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2692 ? TargetOpcode::G_ANYEXT
2693 : TargetOpcode::G_ZEXT;
2696 unsigned NewOpc = Opcode;
2697 if (NewOpc == TargetOpcode::G_CTTZ) {
2706 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2711 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2723 if (Opcode == TargetOpcode::G_CTLZ) {
2730 MI.eraseFromParent();
2733 case TargetOpcode::G_BSWAP: {
2742 MI.getOperand(0).setReg(DstExt);
2755 case TargetOpcode::G_BITREVERSE: {
2764 MI.getOperand(0).setReg(DstExt);
2773 case TargetOpcode::G_FREEZE:
2774 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2781 case TargetOpcode::G_ABS:
2788 case TargetOpcode::G_ADD:
2789 case TargetOpcode::G_AND:
2790 case TargetOpcode::G_MUL:
2791 case TargetOpcode::G_OR:
2792 case TargetOpcode::G_XOR:
2793 case TargetOpcode::G_SUB:
2794 case TargetOpcode::G_SHUFFLE_VECTOR:
2805 case TargetOpcode::G_SBFX:
2806 case TargetOpcode::G_UBFX:
2820 case TargetOpcode::G_SHL:
2836 case TargetOpcode::G_ROTR:
2837 case TargetOpcode::G_ROTL:
2846 case TargetOpcode::G_SDIV:
2847 case TargetOpcode::G_SREM:
2848 case TargetOpcode::G_SMIN:
2849 case TargetOpcode::G_SMAX:
2857 case TargetOpcode::G_SDIVREM:
2866 case TargetOpcode::G_ASHR:
2867 case TargetOpcode::G_LSHR:
2871 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2872 : TargetOpcode::G_ZEXT;
2885 case TargetOpcode::G_UDIV:
2886 case TargetOpcode::G_UREM:
2893 case TargetOpcode::G_UDIVREM:
2901 case TargetOpcode::G_UMIN:
2902 case TargetOpcode::G_UMAX: {
2909 ? TargetOpcode::G_SEXT
2910 : TargetOpcode::G_ZEXT;
2920 case TargetOpcode::G_SELECT:
2937 case TargetOpcode::G_FPTOSI:
2938 case TargetOpcode::G_FPTOUI:
2939 case TargetOpcode::G_INTRINSIC_LRINT:
2940 case TargetOpcode::G_INTRINSIC_LLRINT:
2941 case TargetOpcode::G_IS_FPCLASS:
2951 case TargetOpcode::G_SITOFP:
2961 case TargetOpcode::G_UITOFP:
2971 case TargetOpcode::G_FPTOSI_SAT:
2972 case TargetOpcode::G_FPTOUI_SAT:
2980 MI.getOperand(0).setReg(ExtReg);
2984 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3012 case TargetOpcode::G_LOAD:
3013 case TargetOpcode::G_SEXTLOAD:
3014 case TargetOpcode::G_ZEXTLOAD:
3020 case TargetOpcode::G_STORE: {
3031 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3037 case TargetOpcode::G_CONSTANT: {
3041 MRI.
getType(
MI.getOperand(0).getReg()));
3042 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3043 ExtOpc == TargetOpcode::G_ANYEXT) &&
3046 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3050 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3056 case TargetOpcode::G_FCONSTANT: {
3064 MI.eraseFromParent();
3067 case TargetOpcode::G_IMPLICIT_DEF: {
3073 case TargetOpcode::G_BRCOND:
3079 case TargetOpcode::G_FCMP:
3090 case TargetOpcode::G_ICMP:
3100 unsigned ExtOpcode =
3104 ? TargetOpcode::G_SEXT
3105 : TargetOpcode::G_ZEXT;
3112 case TargetOpcode::G_PTR_ADD:
3113 assert(TypeIdx == 1 &&
"unable to legalize pointer of G_PTR_ADD");
3119 case TargetOpcode::G_PHI: {
3120 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
3123 for (
unsigned I = 1;
I <
MI.getNumOperands();
I += 2) {
3135 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3143 TargetOpcode::G_ANYEXT);
3158 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3194 case TargetOpcode::G_FADD:
3195 case TargetOpcode::G_FMUL:
3196 case TargetOpcode::G_FSUB:
3197 case TargetOpcode::G_FMA:
3198 case TargetOpcode::G_FMAD:
3199 case TargetOpcode::G_FNEG:
3200 case TargetOpcode::G_FABS:
3201 case TargetOpcode::G_FCANONICALIZE:
3202 case TargetOpcode::G_FMINNUM:
3203 case TargetOpcode::G_FMAXNUM:
3204 case TargetOpcode::G_FMINNUM_IEEE:
3205 case TargetOpcode::G_FMAXNUM_IEEE:
3206 case TargetOpcode::G_FMINIMUM:
3207 case TargetOpcode::G_FMAXIMUM:
3208 case TargetOpcode::G_FDIV:
3209 case TargetOpcode::G_FREM:
3210 case TargetOpcode::G_FCEIL:
3211 case TargetOpcode::G_FFLOOR:
3212 case TargetOpcode::G_FCOS:
3213 case TargetOpcode::G_FSIN:
3214 case TargetOpcode::G_FTAN:
3215 case TargetOpcode::G_FACOS:
3216 case TargetOpcode::G_FASIN:
3217 case TargetOpcode::G_FATAN:
3218 case TargetOpcode::G_FATAN2:
3219 case TargetOpcode::G_FCOSH:
3220 case TargetOpcode::G_FSINH:
3221 case TargetOpcode::G_FTANH:
3222 case TargetOpcode::G_FLOG10:
3223 case TargetOpcode::G_FLOG:
3224 case TargetOpcode::G_FLOG2:
3225 case TargetOpcode::G_FRINT:
3226 case TargetOpcode::G_FNEARBYINT:
3227 case TargetOpcode::G_FSQRT:
3228 case TargetOpcode::G_FEXP:
3229 case TargetOpcode::G_FEXP2:
3230 case TargetOpcode::G_FEXP10:
3231 case TargetOpcode::G_FPOW:
3232 case TargetOpcode::G_INTRINSIC_TRUNC:
3233 case TargetOpcode::G_INTRINSIC_ROUND:
3234 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3238 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3244 case TargetOpcode::G_FPOWI:
3245 case TargetOpcode::G_FLDEXP:
3246 case TargetOpcode::G_STRICT_FLDEXP: {
3248 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3269 case TargetOpcode::G_FFREXP: {
3282 case TargetOpcode::G_INTTOPTR:
3290 case TargetOpcode::G_PTRTOINT:
3298 case TargetOpcode::G_BUILD_VECTOR: {
3302 for (
int I = 1, E =
MI.getNumOperands();
I != E; ++
I)
3316 case TargetOpcode::G_SEXT_INREG:
3325 case TargetOpcode::G_PTRMASK: {
3333 case TargetOpcode::G_VECREDUCE_FADD:
3334 case TargetOpcode::G_VECREDUCE_FMUL:
3335 case TargetOpcode::G_VECREDUCE_FMIN:
3336 case TargetOpcode::G_VECREDUCE_FMAX:
3337 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3338 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3352 case TargetOpcode::G_VSCALE: {
3359 SrcMO.
setCImm(ConstantInt::get(Ctx, Val));
3364 case TargetOpcode::G_SPLAT_VECTOR: {
3373 case TargetOpcode::G_INSERT_SUBVECTOR: {
3396 MI.eraseFromParent();
3405 auto Unmerge =
B.buildUnmerge(Ty, Src);
3406 for (
int I = 0, E = Unmerge->getNumOperands() - 1;
I != E; ++
I)
3415 unsigned AddrSpace =
DL.getDefaultGlobalsAddressSpace();
3417 LLT DstLLT =
MRI.getType(DstReg);
3438 MI.eraseFromParent();
3449 MI.eraseFromParent();
3456 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
3461 if (DstTy.isVector()) {
3462 int NumDstElt = DstTy.getNumElements();
3466 LLT DstCastTy = DstEltTy;
3467 LLT SrcPartTy = SrcEltTy;
3471 if (NumSrcElt < NumDstElt) {
3481 SrcPartTy = SrcEltTy;
3482 }
else if (NumSrcElt > NumDstElt) {
3493 DstCastTy = DstEltTy;
3503 MI.eraseFromParent();
3507 if (DstTy.isVector()) {
3511 MI.eraseFromParent();
3527 unsigned NewEltSize,
3528 unsigned OldEltSize) {
3529 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3530 LLT IdxTy =
B.getMRI()->getType(
Idx);
3533 auto OffsetMask =
B.buildConstant(
3535 auto OffsetIdx =
B.buildAnd(IdxTy,
Idx, OffsetMask);
3536 return B.buildShl(IdxTy, OffsetIdx,
3537 B.buildConstant(IdxTy,
Log2_32(OldEltSize))).getReg(0);
3552 auto [Dst, DstTy, SrcVec, SrcVecTy,
Idx, IdxTy] =
MI.getFirst3RegLLTs();
3556 unsigned OldNumElts = SrcVecTy.getNumElements();
3563 if (NewNumElts > OldNumElts) {
3574 if (NewNumElts % OldNumElts != 0)
3578 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3587 for (
unsigned I = 0;
I < NewEltsPerOldElt; ++
I) {
3591 NewOps[
I] = Elt.getReg(0);
3596 MI.eraseFromParent();
3600 if (NewNumElts < OldNumElts) {
3601 if (NewEltSize % OldEltSize != 0)
3623 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3642 MI.eraseFromParent();
3656 LLT TargetTy =
B.getMRI()->getType(TargetReg);
3657 LLT InsertTy =
B.getMRI()->getType(InsertReg);
3658 auto ZextVal =
B.buildZExt(TargetTy, InsertReg);
3659 auto ShiftedInsertVal =
B.buildShl(TargetTy, ZextVal, OffsetBits);
3662 auto EltMask =
B.buildConstant(
3666 auto ShiftedMask =
B.buildShl(TargetTy, EltMask, OffsetBits);
3667 auto InvShiftedMask =
B.buildNot(TargetTy, ShiftedMask);
3670 auto MaskedOldElt =
B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3674 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3688 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy,
Idx, IdxTy] =
3689 MI.getFirst4RegLLTs();
3701 if (NewNumElts < OldNumElts) {
3702 if (NewEltSize % OldEltSize != 0)
3711 const unsigned Log2EltRatio =
Log2_32(NewEltSize / OldEltSize);
3731 CastTy, CastVec, InsertedElt, ScaledIdx).
getReg(0);
3735 MI.eraseFromParent();
3759 auto ConcatMI = dyn_cast<GConcatVectors>(&
MI);
3765 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
3769 if (!LI.
isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3770 return UnableToLegalize;
3775 for (
unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3777 MIRBuilder.
buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3786 MI.eraseFromParent();
3803 auto ShuffleMI = cast<GShuffleVector>(&
MI);
3804 LLT DstTy = MRI.
getType(ShuffleMI->getReg(0));
3805 LLT SrcTy = MRI.
getType(ShuffleMI->getReg(1));
3821 MI.eraseFromParent();
3837 auto ES = cast<GExtractSubvector>(&
MI);
3851 LLT DstTy =
MRI.getType(Dst);
3852 LLT SrcTy =
MRI.getType(Src);
3858 if (DstTy == CastTy)
3866 if (CastEltSize < DstEltSize)
3869 auto AdjustAmt = CastEltSize / DstEltSize;
3870 if (
Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3871 SrcTyMinElts % AdjustAmt != 0)
3900 auto ES = cast<GInsertSubvector>(&
MI);
3915 LLT DstTy =
MRI.getType(Dst);
3916 LLT BigVecTy =
MRI.getType(BigVec);
3917 LLT SubVecTy =
MRI.getType(SubVec);
3919 if (DstTy == CastTy)
3934 if (CastEltSize < DstEltSize)
3937 auto AdjustAmt = CastEltSize / DstEltSize;
3938 if (
Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
3939 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
3967 if (MemSizeInBits != MemStoreSizeInBits) {
3987 if (isa<GSExtLoad>(LoadMI)) {
3990 }
else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3999 if (DstTy != LoadTy)
4025 uint64_t LargeSplitSize, SmallSplitSize;
4030 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4040 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4065 LargeSplitSize / 8);
4069 SmallPtr, *SmallMMO);
4074 if (AnyExtTy == DstTy)
4109 if (StoreWidth != StoreSizeInBits) {
4144 uint64_t LargeSplitSize, SmallSplitSize;
4147 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.
getSizeInBits());
4154 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4193 switch (
MI.getOpcode()) {
4194 case TargetOpcode::G_LOAD: {
4212 case TargetOpcode::G_STORE: {
4228 case TargetOpcode::G_SELECT: {
4234 dbgs() <<
"bitcast action not implemented for vector select\n");
4245 case TargetOpcode::G_AND:
4246 case TargetOpcode::G_OR:
4247 case TargetOpcode::G_XOR: {
4255 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4257 case TargetOpcode::G_INSERT_VECTOR_ELT:
4259 case TargetOpcode::G_CONCAT_VECTORS:
4261 case TargetOpcode::G_SHUFFLE_VECTOR:
4263 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4265 case TargetOpcode::G_INSERT_SUBVECTOR:
4273void LegalizerHelper::changeOpcode(
MachineInstr &
MI,
unsigned NewOpcode) {
4281 using namespace TargetOpcode;
4283 switch(
MI.getOpcode()) {
4286 case TargetOpcode::G_FCONSTANT:
4288 case TargetOpcode::G_BITCAST:
4290 case TargetOpcode::G_SREM:
4291 case TargetOpcode::G_UREM: {
4295 {MI.getOperand(1), MI.getOperand(2)});
4299 MI.eraseFromParent();
4302 case TargetOpcode::G_SADDO:
4303 case TargetOpcode::G_SSUBO:
4305 case TargetOpcode::G_UMULH:
4306 case TargetOpcode::G_SMULH:
4308 case TargetOpcode::G_SMULO:
4309 case TargetOpcode::G_UMULO: {
4312 auto [Res, Overflow,
LHS,
RHS] =
MI.getFirst4Regs();
4315 unsigned Opcode =
MI.getOpcode() == TargetOpcode::G_SMULO
4316 ? TargetOpcode::G_SMULH
4317 : TargetOpcode::G_UMULH;
4321 MI.setDesc(
TII.get(TargetOpcode::G_MUL));
4322 MI.removeOperand(1);
4333 if (Opcode == TargetOpcode::G_SMULH) {
4342 case TargetOpcode::G_FNEG: {
4343 auto [Res, SubByReg] =
MI.getFirst2Regs();
4349 MI.eraseFromParent();
4352 case TargetOpcode::G_FSUB:
4353 case TargetOpcode::G_STRICT_FSUB: {
4354 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
4360 if (
MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4365 MI.eraseFromParent();
4368 case TargetOpcode::G_FMAD:
4370 case TargetOpcode::G_FFLOOR:
4372 case TargetOpcode::G_LROUND:
4373 case TargetOpcode::G_LLROUND: {
4380 MI.eraseFromParent();
4383 case TargetOpcode::G_INTRINSIC_ROUND:
4385 case TargetOpcode::G_FRINT: {
4388 changeOpcode(
MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4391 case TargetOpcode::G_INTRINSIC_LRINT:
4392 case TargetOpcode::G_INTRINSIC_LLRINT: {
4399 MI.eraseFromParent();
4402 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4403 auto [OldValRes, SuccessRes,
Addr, CmpVal, NewVal] =
MI.getFirst5Regs();
4406 **
MI.memoperands_begin());
4409 MI.eraseFromParent();
4412 case TargetOpcode::G_LOAD:
4413 case TargetOpcode::G_SEXTLOAD:
4414 case TargetOpcode::G_ZEXTLOAD:
4416 case TargetOpcode::G_STORE:
4418 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4419 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4420 case TargetOpcode::G_CTLZ:
4421 case TargetOpcode::G_CTTZ:
4422 case TargetOpcode::G_CTPOP:
4425 auto [Res, CarryOut,
LHS,
RHS] =
MI.getFirst4Regs();
4434 MI.eraseFromParent();
4438 auto [Res, CarryOut,
LHS,
RHS, CarryIn] =
MI.getFirst5Regs();
4464 MI.eraseFromParent();
4468 auto [Res, BorrowOut,
LHS,
RHS] =
MI.getFirst4Regs();
4473 MI.eraseFromParent();
4477 auto [Res, BorrowOut,
LHS,
RHS, BorrowIn] =
MI.getFirst5Regs();
4499 MI.eraseFromParent();
4530 case G_MERGE_VALUES:
4532 case G_UNMERGE_VALUES:
4534 case TargetOpcode::G_SEXT_INREG: {
4535 assert(
MI.getOperand(2).isImm() &&
"Expected immediate");
4536 int64_t SizeInBits =
MI.getOperand(2).getImm();
4538 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
4545 MI.eraseFromParent();
4548 case G_EXTRACT_VECTOR_ELT:
4549 case G_INSERT_VECTOR_ELT:
4551 case G_SHUFFLE_VECTOR:
4553 case G_VECTOR_COMPRESS:
4555 case G_DYN_STACKALLOC:
4559 case G_STACKRESTORE:
4569 case G_READ_REGISTER:
4570 case G_WRITE_REGISTER:
4617 case G_MEMCPY_INLINE:
4618 return lowerMemcpyInline(
MI);
4649 unsigned AddrSpace =
DL.getAllocaAddrSpace();
4658 LLT IdxTy =
B.getMRI()->getType(IdxReg);
4670 return B.buildAnd(IdxTy, IdxReg,
B.buildConstant(IdxTy, Imm)).getReg(0);
4673 return B.buildUMin(IdxTy, IdxReg,
B.buildConstant(IdxTy, NElts - 1))
4684 "Converting bits to bytes lost precision");
4691 unsigned IndexSizeInBits =
DL.getIndexSize(AS) * 8;
4693 if (IdxTy != MRI.
getType(Index))
4708 std::initializer_list<unsigned> NonVecOpIndices) {
4709 if (
MI.getNumMemOperands() != 0)
4712 LLT VecTy =
MRI.getType(
MI.getReg(0));
4717 for (
unsigned OpIdx = 1; OpIdx <
MI.getNumOperands(); ++OpIdx) {
4750 int NumParts, NumLeftover;
4751 std::tie(NumParts, NumLeftover) =
4754 assert(NumParts > 0 &&
"Error in getNarrowTypeBreakDown");
4755 for (
int i = 0; i < NumParts; ++i) {
4760 assert(NumLeftover == 1 &&
"expected exactly one leftover");
4769 for (
unsigned i = 0; i <
N; ++i) {
4772 else if (
Op.isImm())
4774 else if (
Op.isPredicate())
4796 std::initializer_list<unsigned> NonVecOpIndices) {
4798 "Non-compatible opcode or not specified non-vector operands");
4801 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4802 unsigned NumDefs =
MI.getNumDefs();
4810 for (
unsigned i = 0; i < NumDefs; ++i) {
4819 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4820 ++UseIdx, ++UseNo) {
4823 MI.getOperand(UseIdx));
4828 for (
auto Reg : SplitPieces)
4833 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4837 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4839 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4840 Defs.
push_back(OutputOpsPieces[DstNo][i]);
4843 for (
unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4844 Uses.push_back(InputOpsPieces[InputNo][i]);
4847 for (
unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4848 OutputRegs[DstNo].push_back(
I.getReg(DstNo));
4853 for (
unsigned i = 0; i < NumDefs; ++i)
4854 mergeMixedSubvectors(
MI.getReg(i), OutputRegs[i]);
4856 for (
unsigned i = 0; i < NumDefs; ++i)
4860 MI.eraseFromParent();
4869 unsigned NumInputs =
MI.getNumOperands() -
MI.getNumDefs();
4870 unsigned NumDefs =
MI.getNumDefs();
4879 for (
unsigned UseIdx = NumDefs, UseNo = 0; UseIdx <
MI.getNumOperands();
4880 UseIdx += 2, ++UseNo) {
4888 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4890 for (
unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4896 for (
unsigned j = 0; j < NumInputs / 2; ++j) {
4897 Phi.addUse(InputOpsPieces[j][i]);
4898 Phi.add(
MI.getOperand(1 + j * 2 + 1));
4908 mergeMixedSubvectors(
MI.getReg(0), OutputRegs);
4913 MI.eraseFromParent();
4921 const int NumDst =
MI.getNumOperands() - 1;
4922 const Register SrcReg =
MI.getOperand(NumDst).getReg();
4926 if (TypeIdx != 1 || NarrowTy == DstTy)
4952 const int PartsPerUnmerge = NumDst / NumUnmerge;
4954 for (
int I = 0;
I != NumUnmerge; ++
I) {
4957 for (
int J = 0; J != PartsPerUnmerge; ++J)
4958 MIB.
addDef(
MI.getOperand(
I * PartsPerUnmerge + J).getReg());
4959 MIB.
addUse(Unmerge.getReg(
I));
4962 MI.eraseFromParent();
4969 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
4973 assert(DstTy.isVector() && NarrowTy.
isVector() &&
"Expected vector types");
4975 if (NarrowTy == SrcTy)
4985 if ((DstTy.getSizeInBits() % NarrowTy.
getSizeInBits() != 0) ||
4999 for (
unsigned i = 1; i <
MI.getNumOperands(); ++i) {
5001 for (
unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5007 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5008 for (
unsigned i = 0,
Offset = 0; i < NumNarrowTyPieces;
5009 ++i,
Offset += NumNarrowTyElts) {
5016 MI.eraseFromParent();
5020 assert(TypeIdx == 0 &&
"Bad type index");
5036 unsigned NumParts = DstTy.getNumElements() / NarrowTy.
getNumElements();
5039 for (
unsigned i = 0; i < NumParts; ++i) {
5041 for (
unsigned j = 0; j < NumElts; ++j)
5042 Sources.
push_back(
MI.getOperand(1 + i * NumElts + j).getReg());
5048 MI.eraseFromParent();
5056 auto [DstReg, SrcVec] =
MI.getFirst2Regs();
5058 bool IsInsert =
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5060 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) &&
"not a vector type index");
5062 InsertVal =
MI.getOperand(2).getReg();
5077 IdxVal = MaybeCst->Value.getSExtValue();
5081 MI.eraseFromParent();
5086 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5089 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5090 TargetOpcode::G_ANYEXT);
5095 int64_t PartIdx = IdxVal / NewNumElts;
5104 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5105 VecParts[PartIdx] = InsertPart.getReg(0);
5109 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5114 MI.eraseFromParent();
5138 bool IsLoad = isa<GLoad>(LdStMI);
5150 int NumLeftover = -1;
5156 if (
extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5158 NumParts = NarrowRegs.
size();
5159 NumLeftover = NarrowLeftoverRegs.
size();
5176 auto MMO = LdStMI.
getMMO();
5178 unsigned NumParts,
unsigned Offset) ->
unsigned {
5181 for (
unsigned Idx = 0, E = NumParts;
Idx != E &&
Offset < TotalSize;
5183 unsigned ByteOffset =
Offset / 8;
5193 ValRegs.push_back(Dst);
5205 unsigned HandledOffset =
5206 splitTypePieces(NarrowTy, NarrowRegs, NumParts,
Offset);
5210 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5213 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5214 LeftoverTy, NarrowLeftoverRegs);
5224 using namespace TargetOpcode;
5228 switch (
MI.getOpcode()) {
5229 case G_IMPLICIT_DEF:
5245 case G_FCANONICALIZE:
5262 case G_INTRINSIC_LRINT:
5263 case G_INTRINSIC_LLRINT:
5264 case G_INTRINSIC_ROUND:
5265 case G_INTRINSIC_ROUNDEVEN:
5268 case G_INTRINSIC_TRUNC:
5295 case G_FMINNUM_IEEE:
5296 case G_FMAXNUM_IEEE:
5316 case G_CTLZ_ZERO_UNDEF:
5318 case G_CTTZ_ZERO_UNDEF:
5334 case G_ADDRSPACE_CAST:
5347 case G_STRICT_FLDEXP:
5361 case G_UNMERGE_VALUES:
5363 case G_BUILD_VECTOR:
5364 assert(TypeIdx == 0 &&
"not a vector type index");
5366 case G_CONCAT_VECTORS:
5370 case G_EXTRACT_VECTOR_ELT:
5371 case G_INSERT_VECTOR_ELT:
5380 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5381 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5383 case G_SHUFFLE_VECTOR:
5389 case G_INTRINSIC_FPTRUNC_ROUND:
5399 assert(
MI.getOpcode() == TargetOpcode::G_BITCAST &&
5400 "Not a bitcast operation");
5405 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
5407 unsigned NewElemCount =
5413 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5418 for (
unsigned i = 0; i < SrcVRegs.
size(); i++)
5423 MI.eraseFromParent();
5429 assert(
MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5433 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5434 MI.getFirst3RegLLTs();
5437 if (DstTy != Src1Ty)
5439 if (DstTy != Src2Ty)
5454 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5470 unsigned InputUsed[2] = {-1U, -1U};
5471 unsigned FirstMaskIdx =
High * NewElts;
5472 bool UseBuildVector =
false;
5473 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5475 int Idx = Mask[FirstMaskIdx + MaskOffset];
5480 if (Input >= std::size(Inputs)) {
5487 Idx -= Input * NewElts;
5491 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5492 if (InputUsed[OpNo] == Input) {
5495 }
else if (InputUsed[OpNo] == -1U) {
5497 InputUsed[OpNo] = Input;
5502 if (OpNo >= std::size(InputUsed)) {
5505 UseBuildVector =
true;
5513 if (UseBuildVector) {
5518 for (
unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5520 int Idx = Mask[FirstMaskIdx + MaskOffset];
5525 if (Input >= std::size(Inputs)) {
5532 Idx -= Input * NewElts;
5536 .buildExtractVectorElement(
5537 EltTy, Inputs[Input],
5544 }
else if (InputUsed[0] == -1U) {
5548 Register Op0 = Inputs[InputUsed[0]];
5552 : Inputs[InputUsed[1]];
5561 MI.eraseFromParent();
5567 auto &RdxMI = cast<GVecReduce>(
MI);
5574 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5580 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5583 const unsigned NumParts =
5589 if (DstTy != NarrowTy)
5595 unsigned NumPartsLeft = NumParts;
5596 while (NumPartsLeft > 1) {
5597 for (
unsigned Idx = 0;
Idx < NumPartsLeft - 1;
Idx += 2) {
5600 .buildInstr(ScalarOpc, {NarrowTy},
5601 {SplitSrcs[
Idx], SplitSrcs[
Idx + 1]})
5604 SplitSrcs = PartialResults;
5605 PartialResults.
clear();
5606 NumPartsLeft = SplitSrcs.
size();
5610 MI.eraseFromParent();
5615 for (
unsigned Idx = 1;
Idx < NumParts; ++
Idx)
5619 MI.eraseFromParent();
5623 for (
unsigned Part = 0; Part < NumParts; ++Part) {
5633 return tryNarrowPow2Reduction(
MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5636 Register Acc = PartialReductions[0];
5637 for (
unsigned Part = 1; Part < NumParts; ++Part) {
5638 if (Part == NumParts - 1) {
5640 {Acc, PartialReductions[Part]});
5643 .
buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5647 MI.eraseFromParent();
5653 unsigned int TypeIdx,
5655 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5656 MI.getFirst3RegLLTs();
5657 if (!NarrowTy.
isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5661 assert((
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5662 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5663 "Unexpected vecreduce opcode");
5664 unsigned ScalarOpc =
MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5665 ? TargetOpcode::G_FADD
5666 : TargetOpcode::G_FMUL;
5672 for (
unsigned i = 0; i < NumParts; i++)
5677 MI.eraseFromParent();
5684 unsigned ScalarOpc) {
5692 while (SplitSrcs.
size() > 1) {
5694 for (
unsigned Idx = 0;
Idx < SplitSrcs.
size()-1;
Idx += 2) {
5702 SplitSrcs = std::move(PartialRdxs);
5706 MI.getOperand(1).setReg(SplitSrcs[0]);
5713 const LLT HalfTy,
const LLT AmtTy) {
5721 MI.eraseFromParent();
5727 unsigned VTBits = 2 * NVTBits;
5730 if (
MI.getOpcode() == TargetOpcode::G_SHL) {
5731 if (Amt.
ugt(VTBits)) {
5733 }
else if (Amt.
ugt(NVTBits)) {
5737 }
else if (Amt == NVTBits) {
5748 }
else if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5749 if (Amt.
ugt(VTBits)) {
5751 }
else if (Amt.
ugt(NVTBits)) {
5755 }
else if (Amt == NVTBits) {
5769 if (Amt.
ugt(VTBits)) {
5772 }
else if (Amt.
ugt(NVTBits)) {
5777 }
else if (Amt == NVTBits) {
5794 MI.eraseFromParent();
5818 if (DstEltSize % 2 != 0)
5824 const unsigned NewBitSize = DstEltSize / 2;
5850 switch (
MI.getOpcode()) {
5851 case TargetOpcode::G_SHL: {
5867 ResultRegs[0] =
Lo.getReg(0);
5868 ResultRegs[1] =
Hi.getReg(0);
5871 case TargetOpcode::G_LSHR:
5872 case TargetOpcode::G_ASHR: {
5882 if (
MI.getOpcode() == TargetOpcode::G_LSHR) {
5896 ResultRegs[0] =
Lo.getReg(0);
5897 ResultRegs[1] =
Hi.getReg(0);
5905 MI.eraseFromParent();
5912 assert(TypeIdx == 0 &&
"Expecting only Idx 0");
5915 for (
unsigned I = 1, E =
MI.getNumOperands();
I != E;
I += 2) {
5930 assert(Ty.
isScalar() &&
"Expected scalar type to make neutral element for");
5935 "getNeutralElementForVecReduce called with invalid opcode!");
5936 case TargetOpcode::G_VECREDUCE_ADD:
5937 case TargetOpcode::G_VECREDUCE_OR:
5938 case TargetOpcode::G_VECREDUCE_XOR:
5939 case TargetOpcode::G_VECREDUCE_UMAX:
5941 case TargetOpcode::G_VECREDUCE_MUL:
5943 case TargetOpcode::G_VECREDUCE_AND:
5944 case TargetOpcode::G_VECREDUCE_UMIN:
5947 case TargetOpcode::G_VECREDUCE_SMAX:
5950 case TargetOpcode::G_VECREDUCE_SMIN:
5953 case TargetOpcode::G_VECREDUCE_FADD:
5955 case TargetOpcode::G_VECREDUCE_FMUL:
5957 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5958 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5959 assert(
false &&
"getNeutralElementForVecReduce unimplemented for "
5960 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5968 unsigned Opc =
MI.getOpcode();
5970 case TargetOpcode::G_IMPLICIT_DEF:
5971 case TargetOpcode::G_LOAD: {
5979 case TargetOpcode::G_STORE:
5986 case TargetOpcode::G_AND:
5987 case TargetOpcode::G_OR:
5988 case TargetOpcode::G_XOR:
5989 case TargetOpcode::G_ADD:
5990 case TargetOpcode::G_SUB:
5991 case TargetOpcode::G_MUL:
5992 case TargetOpcode::G_FADD:
5993 case TargetOpcode::G_FSUB:
5994 case TargetOpcode::G_FMUL:
5995 case TargetOpcode::G_FDIV:
5996 case TargetOpcode::G_FCOPYSIGN:
5997 case TargetOpcode::G_UADDSAT:
5998 case TargetOpcode::G_USUBSAT:
5999 case TargetOpcode::G_SADDSAT:
6000 case TargetOpcode::G_SSUBSAT:
6001 case TargetOpcode::G_SMIN:
6002 case TargetOpcode::G_SMAX:
6003 case TargetOpcode::G_UMIN:
6004 case TargetOpcode::G_UMAX:
6005 case TargetOpcode::G_FMINNUM:
6006 case TargetOpcode::G_FMAXNUM:
6007 case TargetOpcode::G_FMINNUM_IEEE:
6008 case TargetOpcode::G_FMAXNUM_IEEE:
6009 case TargetOpcode::G_FMINIMUM:
6010 case TargetOpcode::G_FMAXIMUM:
6011 case TargetOpcode::G_STRICT_FADD:
6012 case TargetOpcode::G_STRICT_FSUB:
6013 case TargetOpcode::G_STRICT_FMUL:
6014 case TargetOpcode::G_SHL:
6015 case TargetOpcode::G_ASHR:
6016 case TargetOpcode::G_LSHR: {
6024 case TargetOpcode::G_FMA:
6025 case TargetOpcode::G_STRICT_FMA:
6026 case TargetOpcode::G_FSHR:
6027 case TargetOpcode::G_FSHL: {
6036 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6037 case TargetOpcode::G_EXTRACT:
6044 case TargetOpcode::G_INSERT:
6045 case TargetOpcode::G_INSERT_VECTOR_ELT:
6046 case TargetOpcode::G_FREEZE:
6047 case TargetOpcode::G_FNEG:
6048 case TargetOpcode::G_FABS:
6049 case TargetOpcode::G_FSQRT:
6050 case TargetOpcode::G_FCEIL:
6051 case TargetOpcode::G_FFLOOR:
6052 case TargetOpcode::G_FNEARBYINT:
6053 case TargetOpcode::G_FRINT:
6054 case TargetOpcode::G_INTRINSIC_ROUND:
6055 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6056 case TargetOpcode::G_INTRINSIC_TRUNC:
6057 case TargetOpcode::G_BSWAP:
6058 case TargetOpcode::G_FCANONICALIZE:
6059 case TargetOpcode::G_SEXT_INREG:
6060 case TargetOpcode::G_ABS:
6068 case TargetOpcode::G_SELECT: {
6069 auto [DstReg, DstTy, CondReg, CondTy] =
MI.getFirst2RegLLTs();
6071 if (!CondTy.isScalar() ||
6079 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6084 if (CondTy.isVector())
6094 case TargetOpcode::G_UNMERGE_VALUES:
6096 case TargetOpcode::G_PHI:
6098 case TargetOpcode::G_SHUFFLE_VECTOR:
6100 case TargetOpcode::G_BUILD_VECTOR: {
6102 for (
auto Op :
MI.uses()) {
6112 MI.eraseFromParent();
6115 case TargetOpcode::G_SEXT:
6116 case TargetOpcode::G_ZEXT:
6117 case TargetOpcode::G_ANYEXT:
6118 case TargetOpcode::G_TRUNC:
6119 case TargetOpcode::G_FPTRUNC:
6120 case TargetOpcode::G_FPEXT:
6121 case TargetOpcode::G_FPTOSI:
6122 case TargetOpcode::G_FPTOUI:
6123 case TargetOpcode::G_FPTOSI_SAT:
6124 case TargetOpcode::G_FPTOUI_SAT:
6125 case TargetOpcode::G_SITOFP:
6126 case TargetOpcode::G_UITOFP: {
6146 case TargetOpcode::G_ICMP:
6147 case TargetOpcode::G_FCMP: {
6161 case TargetOpcode::G_BITCAST: {
6182 case TargetOpcode::G_VECREDUCE_FADD:
6183 case TargetOpcode::G_VECREDUCE_FMUL:
6184 case TargetOpcode::G_VECREDUCE_ADD:
6185 case TargetOpcode::G_VECREDUCE_MUL:
6186 case TargetOpcode::G_VECREDUCE_AND:
6187 case TargetOpcode::G_VECREDUCE_OR:
6188 case TargetOpcode::G_VECREDUCE_XOR:
6189 case TargetOpcode::G_VECREDUCE_SMAX:
6190 case TargetOpcode::G_VECREDUCE_SMIN:
6191 case TargetOpcode::G_VECREDUCE_UMAX:
6192 case TargetOpcode::G_VECREDUCE_UMIN: {
6196 auto NeutralElement = getNeutralElementForVecReduce(
6204 NeutralElement,
Idx);
6208 MO.
setReg(NewVec.getReg(0));
6220 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6222 unsigned MaskNumElts = Mask.size();
6226 if (MaskNumElts == SrcNumElts)
6229 if (MaskNumElts < SrcNumElts) {
6238 MI.getOperand(1).getReg(),
6239 MI.getOperand(2).getReg(), NewMask);
6240 MI.eraseFromParent();
6245 unsigned PaddedMaskNumElts =
alignTo(MaskNumElts, SrcNumElts);
6246 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6254 MOps1[0] =
MI.getOperand(1).getReg();
6255 MOps2[0] =
MI.getOperand(2).getReg();
6262 for (
unsigned I = 0;
I != MaskNumElts; ++
I) {
6264 if (
Idx >=
static_cast<int>(SrcNumElts))
6265 Idx += PaddedMaskNumElts - SrcNumElts;
6270 if (MaskNumElts != PaddedMaskNumElts) {
6275 for (
unsigned I = 0;
I < MaskNumElts; ++
I) {
6285 MI.eraseFromParent();
6291 unsigned int TypeIdx,
LLT MoreTy) {
6292 auto [DstTy, Src1Ty, Src2Ty] =
MI.getFirst3LLTs();
6294 unsigned NumElts = DstTy.getNumElements();
6297 if (DstTy.isVector() && Src1Ty.isVector() &&
6298 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6306 if (DstTy != Src1Ty || DstTy != Src2Ty)
6314 for (
unsigned I = 0;
I != NumElts; ++
I) {
6316 if (
Idx <
static_cast<int>(NumElts))
6319 NewMask[
I] =
Idx - NumElts + WidenNumElts;
6324 MI.getOperand(1).getReg(),
6325 MI.getOperand(2).getReg(), NewMask);
6326 MI.eraseFromParent();
6335 unsigned SrcParts = Src1Regs.
size();
6336 unsigned DstParts = DstRegs.
size();
6338 unsigned DstIdx = 0;
6340 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
6341 DstRegs[DstIdx] = FactorSum;
6343 unsigned CarrySumPrevDstIdx;
6346 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
6348 for (
unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
6349 i <= std::min(DstIdx, SrcParts - 1); ++i) {
6351 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
6355 for (
unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
6356 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
6358 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
6368 if (DstIdx != DstParts - 1) {
6370 B.buildUAddo(NarrowTy,
LLT::scalar(1), Factors[0], Factors[1]);
6371 FactorSum = Uaddo.
getReg(0);
6372 CarrySum =
B.buildZExt(NarrowTy, Uaddo.
getReg(1)).
getReg(0);
6373 for (
unsigned i = 2; i < Factors.
size(); ++i) {
6375 B.buildUAddo(NarrowTy,
LLT::scalar(1), FactorSum, Factors[i]);
6376 FactorSum = Uaddo.
getReg(0);
6378 CarrySum =
B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
6382 FactorSum =
B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
6383 for (
unsigned i = 2; i < Factors.
size(); ++i)
6384 FactorSum =
B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
6387 CarrySumPrevDstIdx = CarrySum;
6388 DstRegs[DstIdx] = FactorSum;
6405 unsigned Opcode =
MI.getOpcode();
6406 unsigned OpO, OpE, OpF;
6408 case TargetOpcode::G_SADDO:
6409 case TargetOpcode::G_SADDE:
6410 case TargetOpcode::G_UADDO:
6411 case TargetOpcode::G_UADDE:
6412 case TargetOpcode::G_ADD:
6413 OpO = TargetOpcode::G_UADDO;
6414 OpE = TargetOpcode::G_UADDE;
6415 OpF = TargetOpcode::G_UADDE;
6416 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
6417 OpF = TargetOpcode::G_SADDE;
6419 case TargetOpcode::G_SSUBO:
6420 case TargetOpcode::G_SSUBE:
6421 case TargetOpcode::G_USUBO:
6422 case TargetOpcode::G_USUBE:
6423 case TargetOpcode::G_SUB:
6424 OpO = TargetOpcode::G_USUBO;
6425 OpE = TargetOpcode::G_USUBE;
6426 OpF = TargetOpcode::G_USUBE;
6427 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
6428 OpF = TargetOpcode::G_SSUBE;
6435 unsigned NumDefs =
MI.getNumExplicitDefs();
6436 Register Src1 =
MI.getOperand(NumDefs).getReg();
6437 Register Src2 =
MI.getOperand(NumDefs + 1).getReg();
6440 CarryDst =
MI.getOperand(1).getReg();
6441 if (
MI.getNumOperands() == NumDefs + 3)
6442 CarryIn =
MI.getOperand(NumDefs + 2).getReg();
6445 LLT LeftoverTy, DummyTy;
6447 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
6452 int NarrowParts = Src1Regs.
size();
6453 Src1Regs.
append(Src1Left);
6454 Src2Regs.
append(Src2Left);
6457 for (
int i = 0, e = Src1Regs.
size(); i != e; ++i) {
6462 if (i == e - 1 && CarryDst)
6463 CarryOut = CarryDst;
6469 {Src1Regs[i], Src2Regs[i]});
6470 }
else if (i == e - 1) {
6472 {Src1Regs[i], Src2Regs[i], CarryIn});
6475 {Src1Regs[i], Src2Regs[i], CarryIn});
6481 insertParts(
MI.getOperand(0).getReg(), RegTy, NarrowTy,
6482 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
6483 ArrayRef(DstRegs).drop_front(NarrowParts));
6485 MI.eraseFromParent();
6491 auto [DstReg, Src1, Src2] =
MI.getFirst3Regs();
6499 if (
Size % NarrowSize != 0)
6502 unsigned NumParts =
Size / NarrowSize;
6503 bool IsMulHigh =
MI.getOpcode() == TargetOpcode::G_UMULH;
6504 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
6510 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
6515 MI.eraseFromParent();
6525 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI;
6539 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
6555 if (SizeOp1 % NarrowSize != 0)
6557 int NumParts = SizeOp1 / NarrowSize;
6561 extractParts(
MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
6565 uint64_t OpStart =
MI.getOperand(2).getImm();
6567 for (
int i = 0; i < NumParts; ++i) {
6568 unsigned SrcStart = i * NarrowSize;
6570 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
6573 }
else if (SrcStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6581 int64_t ExtractOffset;
6583 if (OpStart < SrcStart) {
6585 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
6587 ExtractOffset = OpStart - SrcStart;
6588 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
6592 if (ExtractOffset != 0 || SegSize != NarrowSize) {
6604 else if (DstRegs.
size() > 1)
6608 MI.eraseFromParent();
6623 extractParts(
MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
6626 SrcRegs.
append(LeftoverRegs);
6630 uint64_t OpStart =
MI.getOperand(3).getImm();
6632 for (
int I = 0, E = SrcRegs.
size();
I != E; ++
I) {
6633 unsigned DstStart =
I * NarrowSize;
6635 if (DstStart == OpStart && NarrowTy == MRI.
getType(OpReg)) {
6643 if (MRI.
getType(SrcRegs[
I]) == LeftoverTy) {
6649 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
6657 int64_t ExtractOffset, InsertOffset;
6659 if (OpStart < DstStart) {
6661 ExtractOffset = DstStart - OpStart;
6662 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
6664 InsertOffset = OpStart - DstStart;
6667 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
6671 if (ExtractOffset != 0 || SegSize != OpSize) {
6691 MI.eraseFromParent();
6701 assert(
MI.getNumOperands() == 3 && TypeIdx == 0);
6707 if (!
extractParts(
MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6708 Src0Regs, Src0LeftoverRegs,
MIRBuilder, MRI))
6712 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6713 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6716 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6718 {Src0Regs[I], Src1Regs[I]});
6722 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6725 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6726 DstLeftoverRegs.
push_back(Inst.getReg(0));
6729 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6730 LeftoverTy, DstLeftoverRegs);
6732 MI.eraseFromParent();
6742 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
6749 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6750 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
MI.getOpcode());
6751 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6753 MI.eraseFromParent();
6763 Register CondReg =
MI.getOperand(1).getReg();
6775 if (!
extractParts(
MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6776 Src1Regs, Src1LeftoverRegs,
MIRBuilder, MRI))
6780 if (!
extractParts(
MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6781 Src2Regs, Src2LeftoverRegs,
MIRBuilder, MRI))
6784 for (
unsigned I = 0, E = Src1Regs.
size();
I != E; ++
I) {
6786 CondReg, Src1Regs[
I], Src2Regs[
I]);
6790 for (
unsigned I = 0, E = Src1LeftoverRegs.
size();
I != E; ++
I) {
6792 LeftoverTy, CondReg, Src1LeftoverRegs[
I], Src2LeftoverRegs[
I]);
6796 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6797 LeftoverTy, DstLeftoverRegs);
6799 MI.eraseFromParent();
6809 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6813 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6816 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6818 auto C_0 =
B.buildConstant(NarrowTy, 0);
6820 UnmergeSrc.getReg(1), C_0);
6821 auto LoCTLZ = IsUndef ?
6822 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6823 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6824 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6825 auto HiIsZeroCTLZ =
B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6826 auto HiCTLZ =
B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6827 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6829 MI.eraseFromParent();
6842 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6846 const bool IsUndef =
MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6849 auto UnmergeSrc =
B.buildUnmerge(NarrowTy, SrcReg);
6851 auto C_0 =
B.buildConstant(NarrowTy, 0);
6853 UnmergeSrc.getReg(0), C_0);
6854 auto HiCTTZ = IsUndef ?
6855 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6856 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6857 auto C_NarrowSize =
B.buildConstant(DstTy, NarrowSize);
6858 auto LoIsZeroCTTZ =
B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6859 auto LoCTTZ =
B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6860 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6862 MI.eraseFromParent();
6875 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6885 MI.eraseFromParent();
6905 auto MinExp =
B.buildConstant(ExpTy,
minIntN(ClampSize));
6906 auto ClampMin =
B.buildSMax(ExpTy, ExpReg, MinExp);
6907 auto MaxExp =
B.buildConstant(ExpTy,
maxIntN(ClampSize));
6908 auto Clamp =
B.buildSMin(ExpTy, ClampMin, MaxExp);
6910 auto Trunc =
B.buildTrunc(NarrowTy, Clamp);
6912 MI.getOperand(2).setReg(Trunc.getReg(0));
6919 unsigned Opc =
MI.getOpcode();
6928 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6931 MI.setDesc(
TII.get(TargetOpcode::G_CTLZ));
6935 case TargetOpcode::G_CTLZ: {
6936 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6939 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6947 MI.eraseFromParent();
6963 for (
unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6967 Op = MIBOp.getReg(0);
6972 MI.eraseFromParent();
6975 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6978 MI.setDesc(
TII.get(TargetOpcode::G_CTTZ));
6982 case TargetOpcode::G_CTTZ: {
6983 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
6986 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6995 MI.eraseFromParent();
7006 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7007 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7011 MI.eraseFromParent();
7015 MI.setDesc(
TII.get(TargetOpcode::G_CTPOP));
7016 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7020 case TargetOpcode::G_CTPOP: {
7031 auto C_1 =
B.buildConstant(Ty, 1);
7032 auto B2Set1LoTo1Hi =
B.buildLShr(Ty, SrcReg, C_1);
7034 auto C_B2Mask1HiTo0 =
B.buildConstant(Ty, B2Mask1HiTo0);
7035 auto B2Count1Hi =
B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7036 auto B2Count =
B.buildSub(Ty, SrcReg, B2Count1Hi);
7040 auto C_2 =
B.buildConstant(Ty, 2);
7041 auto B4Set2LoTo2Hi =
B.buildLShr(Ty, B2Count, C_2);
7043 auto C_B4Mask2HiTo0 =
B.buildConstant(Ty, B4Mask2HiTo0);
7044 auto B4HiB2Count =
B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7045 auto B4LoB2Count =
B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7046 auto B4Count =
B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7053 auto C_4 =
B.buildConstant(Ty, 4);
7054 auto B8HiB4Count =
B.buildLShr(Ty, B4Count, C_4);
7055 auto B8CountDirty4Hi =
B.buildAdd(Ty, B8HiB4Count, B4Count);
7057 auto C_B8Mask4HiTo0 =
B.buildConstant(Ty, B8Mask4HiTo0);
7058 auto B8Count =
B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7060 assert(
Size<=128 &&
"Scalar size is too large for CTPOP lower algorithm");
7066 auto C_SizeM8 =
B.buildConstant(Ty,
Size - 8);
7068 auto IsMulSupported = [
this](
const LLT Ty) {
7069 auto Action = LI.
getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7072 if (IsMulSupported(Ty)) {
7073 auto ResTmp =
B.buildMul(Ty, B8Count, MulMask);
7074 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7076 auto ResTmp = B8Count;
7077 for (
unsigned Shift = 8; Shift <
Size; Shift *= 2) {
7078 auto ShiftC =
B.buildConstant(Ty, Shift);
7079 auto Shl =
B.buildShl(Ty, ResTmp, ShiftC);
7080 ResTmp =
B.buildAdd(Ty, ResTmp, Shl);
7082 B.buildLShr(
MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7084 MI.eraseFromParent();
7097 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(
C);
7105 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7114 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7115 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7138 MI.eraseFromParent();
7144 auto [Dst,
X,
Y, Z] =
MI.getFirst4Regs();
7149 const bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7193 MI.eraseFromParent();
7207 bool IsFSHL =
MI.getOpcode() == TargetOpcode::G_FSHL;
7208 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7212 return lowerFunnelShiftAsShifts(
MI);
7216 if (Result == UnableToLegalize)
7217 return lowerFunnelShiftAsShifts(
MI);
7222 auto [Dst, Src] =
MI.getFirst2Regs();
7236 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7250 {UnmergeSrc.getReg(0)});
7252 {UnmergeSrc.getReg(1)});
7257 MI.eraseFromParent();
7274 assert(
MI.getOpcode() == TargetOpcode::G_TRUNC);
7278 LLT DstTy =
MRI.getType(DstReg);
7279 LLT SrcTy =
MRI.getType(SrcReg);
7299 for (
unsigned I = 0;
I < SplitSrcs.
size(); ++
I) {
7313 MI.eraseFromParent();
7322 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
7324 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
7325 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7328 MI.eraseFromParent();
7333 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] =
MI.getFirst3RegLLTs();
7335 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
7336 bool IsLeft =
MI.getOpcode() == TargetOpcode::G_ROTL;
7341 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
7344 return lowerRotateWithReverseRotate(
MI);
7347 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7348 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
7349 bool IsFShLegal =
false;
7350 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
7351 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
7355 MI.eraseFromParent();
7360 return buildFunnelShift(FShOpc, Dst, Src, Amt);
7363 return buildFunnelShift(RevFsh, Dst, Src, Amt);
7368 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
7369 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
7370 auto BitWidthMinusOneC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits - 1);
7376 auto NegAmt = MIRBuilder.
buildSub(AmtTy, Zero, Amt);
7377 auto ShAmt = MIRBuilder.
buildAnd(AmtTy, Amt, BitWidthMinusOneC);
7379 auto RevAmt = MIRBuilder.
buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
7385 auto BitWidthC = MIRBuilder.
buildConstant(AmtTy, EltSizeInBits);
7386 auto ShAmt = MIRBuilder.
buildURem(AmtTy, Amt, BitWidthC);
7388 auto RevAmt = MIRBuilder.
buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
7390 auto Inner = MIRBuilder.
buildInstr(RevShiftOpc, {DstTy}, {Src, One});
7394 MIRBuilder.
buildOr(Dst, ShVal, RevShiftVal);
7395 MI.eraseFromParent();
7403 auto [Dst, Src] =
MI.getFirst2Regs();
7453 MI.eraseFromParent();
7461 auto [Dst, Src] =
MI.getFirst2Regs();
7488 MI.eraseFromParent();
7496 auto [Dst, Src] =
MI.getFirst2Regs();
7513 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
7525 MI.eraseFromParent();
7530 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7536 MI.eraseFromParent();
7557 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7567 MI.eraseFromParent();
7592 MI.eraseFromParent();
7600 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7604 if (SrcTy !=
S64 && SrcTy !=
S32)
7606 if (DstTy !=
S32 && DstTy !=
S64)
7635 MI.eraseFromParent();
7640 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7699 MI.eraseFromParent();
7705 auto [Dst, DstTy, Src, SrcTy] =
MI.getFirst2RegLLTs();
7707 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7708 unsigned SatWidth = DstTy.getScalarSizeInBits();
7712 APInt MinInt, MaxInt;
7735 if (AreExactFloatBounds) {
7752 MI.eraseFromParent();
7759 DstTy.changeElementSize(1), Src, Src);
7762 MI.eraseFromParent();
7789 MI.eraseFromParent();
7797 DstTy.changeElementSize(1), Src, Src);
7799 MI.eraseFromParent();
7809 auto [Dst, Src] =
MI.getFirst2Regs();
7817 unsigned Flags =
MI.getFlags();
7820 MI.eraseFromParent();
7824 const unsigned ExpMask = 0x7ff;
7825 const unsigned ExpBiasf64 = 1023;
7826 const unsigned ExpBiasf16 = 15;
7915 MI.eraseFromParent();
7921 auto [DstTy, SrcTy] =
MI.getFirst2LLTs();
7932 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7937 MI.eraseFromParent();
7943 case TargetOpcode::G_SMIN:
7945 case TargetOpcode::G_SMAX:
7947 case TargetOpcode::G_UMIN:
7949 case TargetOpcode::G_UMAX:
7957 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
7965 MI.eraseFromParent();
8005 unsigned BoolExtOp =
8012 MI.eraseFromParent();
8018 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] =
MI.getFirst3RegLLTs();
8019 const int Src0Size = Src0Ty.getScalarSizeInBits();
8020 const int Src1Size = Src1Ty.getScalarSizeInBits();
8030 if (Src0Ty == Src1Ty) {
8032 }
else if (Src0Size > Src1Size) {
8047 unsigned Flags =
MI.getFlags();
8054 MI.eraseFromParent();
8060 unsigned NewOp =
MI.getOpcode() == TargetOpcode::G_FMINNUM ?
8061 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
8063 auto [Dst, Src0, Src1] =
MI.getFirst3Regs();
8083 MI.eraseFromParent();
8091 unsigned Flags =
MI.getFlags();
8096 MI.eraseFromParent();
8102 auto [DstReg,
X] =
MI.getFirst2Regs();
8103 const unsigned Flags =
MI.getFlags();
8130 MI.eraseFromParent();
8135 auto [DstReg, SrcReg] =
MI.getFirst2Regs();
8136 unsigned Flags =
MI.getFlags();
8148 SrcReg, Zero, Flags);
8150 SrcReg, Trunc, Flags);
8155 MI.eraseFromParent();
8161 const unsigned NumOps =
MI.getNumOperands();
8162 auto [DstReg, DstTy, Src0Reg, Src0Ty] =
MI.getFirst2RegLLTs();
8163 unsigned PartSize = Src0Ty.getSizeInBits();
8168 for (
unsigned I = 2;
I != NumOps; ++
I) {
8169 const unsigned Offset = (
I - 1) * PartSize;
8174 Register NextResult =
I + 1 == NumOps && WideTy == DstTy ? DstReg :
8180 ResultReg = NextResult;
8183 if (DstTy.isPointer()) {
8185 DstTy.getAddressSpace())) {
8193 MI.eraseFromParent();
8199 const unsigned NumDst =
MI.getNumOperands() - 1;
8200 Register SrcReg =
MI.getOperand(NumDst).getReg();
8201 Register Dst0Reg =
MI.getOperand(0).getReg();
8216 unsigned Offset = DstSize;
8217 for (
unsigned I = 1;
I != NumDst; ++
I,
Offset += DstSize) {
8223 MI.eraseFromParent();
8242 if (
MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
8243 InsertVal =
MI.getOperand(2).getReg();
8257 SrcRegs[IdxVal] =
MI.getOperand(2).getReg();
8263 MI.eraseFromParent();
8268 LLVM_DEBUG(
dbgs() <<
"Can't handle non-byte element vectors yet\n");
8286 int64_t
Offset = IdxVal * EltBytes;
8305 MI.eraseFromParent();
8311 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
8312 MI.getFirst3RegLLTs();
8320 for (
int Idx : Mask) {
8322 if (!Undef.isValid())
8328 if (Src0Ty.isScalar()) {
8331 int NumElts = Src0Ty.getNumElements();
8332 Register SrcVec =
Idx < NumElts ? Src0Reg : Src1Reg;
8333 int ExtractIdx =
Idx < NumElts ?
Idx :
Idx - NumElts;
8340 if (DstTy.isScalar())
8344 MI.eraseFromParent();
8350 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
8351 MI.getFirst4RegLLTs();
8353 if (VecTy.isScalableVector())
8378 std::optional<APInt> PassthruSplatVal =
8381 if (PassthruSplatVal.has_value()) {
8384 }
else if (HasPassthru) {
8396 unsigned NumElmts = VecTy.getNumElements();
8397 for (
unsigned I = 0;
I < NumElmts; ++
I) {
8412 if (HasPassthru &&
I == NumElmts - 1) {
8418 {OutPos, EndOfVector});
8431 MI.eraseFromParent();
8448 if (Alignment >
Align(1)) {
8460 const auto &MF = *
MI.getMF();
8461 const auto &TFI = *MF.getSubtarget().getFrameLowering();
8466 Register AllocSize =
MI.getOperand(1).getReg();
8477 MI.eraseFromParent();
8488 MI.eraseFromParent();
8499 MI.eraseFromParent();
8505 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
8506 unsigned Offset =
MI.getOperand(2).getImm();
8511 unsigned DstSize = DstTy.getSizeInBits();
8513 if ((
Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
8520 for (
unsigned Idx =
Offset / SrcEltSize;
8524 if (SubVectorElts.
size() == 1)
8529 MI.eraseFromParent();
8534 if (DstTy.isScalar() &&
8537 LLT SrcIntTy = SrcTy;
8551 MI.eraseFromParent();
8559 auto [Dst, Src, InsertSrc] =
MI.getFirst3Regs();
8571 if ((
Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
8584 for (
unsigned i = 0;
Idx < (
Offset + InsertSize) / EltSize;
8586 DstElts.
push_back(UnmergeInsertSrc.getReg(i));
8599 MI.eraseFromParent();
8613 LLVM_DEBUG(
dbgs() <<
"Not casting non-integral address space integer\n");
8617 LLT IntDstTy = DstTy;
8643 MI.eraseFromParent();
8649 auto [Dst0, Dst0Ty, Dst1, Dst1Ty,
LHS, LHSTy,
RHS, RHSTy] =
8650 MI.getFirst4RegLLTs();
8651 const bool IsAdd =
MI.getOpcode() == TargetOpcode::G_SADDO;
8654 LLT BoolTy = Dst1Ty;
8673 auto ResultLowerThanLHS =
8681 MI.eraseFromParent();
8688 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8693 switch (
MI.getOpcode()) {
8696 case TargetOpcode::G_UADDSAT:
8699 BaseOp = TargetOpcode::G_ADD;
8701 case TargetOpcode::G_SADDSAT:
8704 BaseOp = TargetOpcode::G_ADD;
8706 case TargetOpcode::G_USUBSAT:
8709 BaseOp = TargetOpcode::G_SUB;
8711 case TargetOpcode::G_SSUBSAT:
8714 BaseOp = TargetOpcode::G_SUB;
8757 MI.eraseFromParent();
8763 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8768 unsigned OverflowOp;
8769 switch (
MI.getOpcode()) {
8772 case TargetOpcode::G_UADDSAT:
8775 OverflowOp = TargetOpcode::G_UADDO;
8777 case TargetOpcode::G_SADDSAT:
8780 OverflowOp = TargetOpcode::G_SADDO;
8782 case TargetOpcode::G_USUBSAT:
8785 OverflowOp = TargetOpcode::G_USUBO;
8787 case TargetOpcode::G_SSUBSAT:
8790 OverflowOp = TargetOpcode::G_SSUBO;
8796 Register Tmp = OverflowRes.getReg(0);
8797 Register Ov = OverflowRes.getReg(1);
8823 MI.eraseFromParent();
8829 assert((
MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
8830 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
8831 "Expected shlsat opcode!");
8832 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SSHLSAT;
8833 auto [Res,
LHS,
RHS] =
MI.getFirst3Regs();
8855 MI.eraseFromParent();
8860 auto [Dst, Src] =
MI.getFirst2Regs();
8863 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
8872 for (
unsigned i = 1; i < SizeInBytes / 2; ++i) {
8874 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
8886 Res.getInstr()->getOperand(0).setReg(Dst);
8888 MI.eraseFromParent();
8895 const LLT Ty = Dst.getLLTTy(*
B.getMRI());
8898 auto LHS =
B.buildLShr(Ty,
B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
8899 auto RHS =
B.buildAnd(Ty,
B.buildShl(Ty, Src, C_N), MaskLoNTo0);
8900 return B.buildOr(Dst,
LHS,
RHS);
8905 auto [Dst, Src] =
MI.getFirst2Regs();
8933 for (
unsigned I = 0, J =
Size - 1;
I <
Size; ++
I, --J) {
8953 MI.eraseFromParent();
8961 bool IsRead =
MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
8962 int NameOpIdx = IsRead ? 1 : 0;
8963 int ValRegIndex = IsRead ? 0 : 1;
8965 Register ValReg =
MI.getOperand(ValRegIndex).getReg();
8967 const MDString *RegStr = cast<MDString>(
8968 cast<MDNode>(
MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
8979 MI.eraseFromParent();
8985 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SMULH;
8986 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
8995 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9001 MI.eraseFromParent();
9007 auto [DstReg, DstTy, SrcReg, SrcTy] =
MI.getFirst2RegLLTs();
9012 MI.eraseFromParent();
9017 MI.eraseFromParent();
9036 APInt ExpMask = Inf;
9054 LLT DstTyCopy = DstTy;
9069 Mask &= ~fcPosFinite;
9076 Mask &= ~fcNegFinite;
9087 Mask &= ~PartialCheck;
9096 else if (PartialCheck ==
fcZero)
9115 appendToRes(SubnormalRes);
9122 else if (PartialCheck ==
fcInf)
9135 if (PartialCheck ==
fcNan) {
9139 }
else if (PartialCheck ==
fcQNan) {
9149 Abs, InfWithQnanBitC);
9157 APInt ExpLSB = ExpMask & ~(ExpMask.
shl(1));
9160 APInt MaxExpMinusOne = ExpMask - ExpLSB;
9171 appendToRes(NormalRes);
9175 MI.eraseFromParent();
9181 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
9182 MI.getFirst4RegLLTs();
9184 bool IsEltPtr = DstTy.isPointerOrPointerVector();
9193 if (MaskTy.isScalar()) {
9207 if (DstTy.isVector()) {
9210 MaskReg = ShufSplat.
getReg(0);
9215 }
else if (!DstTy.isVector()) {
9220 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
9233 MI.eraseFromParent();
9239 unsigned Opcode =
MI.getOpcode();
9242 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
9243 : TargetOpcode::G_UDIV,
9244 {
MI.getOperand(0).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
9246 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
9247 : TargetOpcode::G_UREM,
9248 {
MI.getOperand(1).
getReg()}, {
MI.getOperand(2),
MI.getOperand(3)});
9249 MI.eraseFromParent();
9266 MI.eraseFromParent();
9281 MI.eraseFromParent();
9288 Register DestReg =
MI.getOperand(0).getReg();
9294 MI.eraseFromParent();
9310 MI.eraseFromParent();
9337 Register ListPtr =
MI.getOperand(1).getReg();
9347 const Align A(
MI.getOperand(2).getImm());
9354 VAList = AndDst.
getReg(0);
9372 Align EltAlignment =
DL.getABITypeAlign(Ty);
9377 MI.eraseFromParent();
9392 unsigned Limit,
const MemOp &
Op,
9393 unsigned DstAS,
unsigned SrcAS,
9396 if (
Op.isMemcpyWithFixedDstAlign() &&
Op.getSrcAlign() <
Op.getDstAlign())
9406 if (
Op.isFixedDstAlign())
9414 unsigned NumMemOps = 0;
9418 while (TySize >
Size) {
9427 assert(NewTySize > 0 &&
"Could not find appropriate type");
9434 if (NumMemOps &&
Op.allowOverlap() && NewTySize <
Size &&
9436 VT, DstAS,
Op.isFixedDstAlign() ?
Op.getDstAlign() :
Align(1),
9446 if (++NumMemOps > Limit)
9449 MemOps.push_back(Ty);
9461 if (!Ty.
isVector() && ValVRegAndVal) {
9462 APInt Scalar = ValVRegAndVal->Value.trunc(8);
9470 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
9493 auto &MF = *
MI.getParent()->getParent();
9494 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9495 auto &
DL = MF.getDataLayout();
9498 assert(KnownLen != 0 &&
"Have a zero length memset length!");
9500 bool DstAlignCanChange =
false;
9506 DstAlignCanChange =
true;
9509 std::vector<LLT> MemOps;
9511 const auto &DstMMO = **
MI.memoperands_begin();
9515 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
9523 MF.getFunction().getAttributes(), TLI))
9526 if (DstAlignCanChange) {
9529 Align NewAlign =
DL.getABITypeAlign(IRTy);
9530 if (NewAlign > Alignment) {
9531 Alignment = NewAlign;
9541 LLT LargestTy = MemOps[0];
9542 for (
unsigned i = 1; i < MemOps.size(); i++)
9544 LargestTy = MemOps[i];
9557 unsigned DstOff = 0;
9558 unsigned Size = KnownLen;
9559 for (
unsigned I = 0;
I < MemOps.size();
I++) {
9562 if (TySize >
Size) {
9565 assert(
I == MemOps.size() - 1 &&
I != 0);
9566 DstOff -= TySize -
Size;
9577 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
9584 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
9590 Ptr = MIB.buildPtrAdd(PtrTy, Dst,
Offset).getReg(0);
9593 MIB.buildStore(
Value,
Ptr, *StoreMMO);
9598 MI.eraseFromParent();
9604 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9606 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9608 const auto *MMOIt =
MI.memoperands_begin();
9610 bool IsVolatile =
MemOp->isVolatile();
9616 "inline memcpy with dynamic size is not yet supported");
9617 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9618 if (KnownLen == 0) {
9619 MI.eraseFromParent();
9623 const auto &DstMMO = **
MI.memoperands_begin();
9624 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9625 Align DstAlign = DstMMO.getBaseAlign();
9626 Align SrcAlign = SrcMMO.getBaseAlign();
9628 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9635 Align SrcAlign,
bool IsVolatile) {
9636 assert(
MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
9637 return lowerMemcpy(
MI, Dst, Src, KnownLen,
9638 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
9645 Align SrcAlign,
bool IsVolatile) {
9646 auto &MF = *
MI.getParent()->getParent();
9647 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9648 auto &
DL = MF.getDataLayout();
9651 assert(KnownLen != 0 &&
"Have a zero length memcpy length!");
9653 bool DstAlignCanChange =
false;
9655 Align Alignment = std::min(DstAlign, SrcAlign);
9659 DstAlignCanChange =
true;
9665 std::vector<LLT> MemOps;
9667 const auto &DstMMO = **
MI.memoperands_begin();
9668 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9674 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9677 MF.getFunction().getAttributes(), TLI))
9680 if (DstAlignCanChange) {
9683 Align NewAlign =
DL.getABITypeAlign(IRTy);
9688 if (!
TRI->hasStackRealignment(MF))
9690 NewAlign = std::min(NewAlign, *StackAlign);
9692 if (NewAlign > Alignment) {
9693 Alignment = NewAlign;
9701 LLVM_DEBUG(
dbgs() <<
"Inlining memcpy: " <<
MI <<
" into loads & stores\n");
9709 unsigned CurrOffset = 0;
9710 unsigned Size = KnownLen;
9711 for (
auto CopyTy : MemOps) {
9714 if (CopyTy.getSizeInBytes() >
Size)
9715 CurrOffset -= CopyTy.getSizeInBytes() -
Size;
9719 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9721 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
9726 if (CurrOffset != 0) {
9730 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9732 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
9736 if (CurrOffset != 0) {
9738 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9740 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
9741 CurrOffset += CopyTy.getSizeInBytes();
9742 Size -= CopyTy.getSizeInBytes();
9745 MI.eraseFromParent();
9753 auto &MF = *
MI.getParent()->getParent();
9754 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9755 auto &
DL = MF.getDataLayout();
9758 assert(KnownLen != 0 &&
"Have a zero length memmove length!");
9760 bool DstAlignCanChange =
false;
9763 Align Alignment = std::min(DstAlign, SrcAlign);
9767 DstAlignCanChange =
true;
9770 std::vector<LLT> MemOps;
9772 const auto &DstMMO = **
MI.memoperands_begin();
9773 const auto &SrcMMO = **std::next(
MI.memoperands_begin());
9782 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
9785 MF.getFunction().getAttributes(), TLI))
9788 if (DstAlignCanChange) {
9791 Align NewAlign =
DL.getABITypeAlign(IRTy);
9796 if (!
TRI->hasStackRealignment(MF))
9798 NewAlign = std::min(NewAlign, *StackAlign);
9800 if (NewAlign > Alignment) {
9801 Alignment = NewAlign;
9809 LLVM_DEBUG(
dbgs() <<
"Inlining memmove: " <<
MI <<
" into loads & stores\n");
9815 unsigned CurrOffset = 0;
9817 for (
auto CopyTy : MemOps) {
9820 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
9824 if (CurrOffset != 0) {
9828 LoadPtr = MIB.buildPtrAdd(SrcTy, Src,
Offset).getReg(0);
9830 LoadVals.
push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
9831 CurrOffset += CopyTy.getSizeInBytes();
9835 for (
unsigned I = 0;
I < MemOps.size(); ++
I) {
9836 LLT CopyTy = MemOps[
I];
9839 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.
getSizeInBytes());
9842 if (CurrOffset != 0) {
9846 StorePtr = MIB.buildPtrAdd(DstTy, Dst,
Offset).getReg(0);
9848 MIB.buildStore(LoadVals[
I], StorePtr, *StoreMMO);
9851 MI.eraseFromParent();
9857 const unsigned Opc =
MI.getOpcode();
9860 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
9861 Opc == TargetOpcode::G_MEMSET) &&
9862 "Expected memcpy like instruction");
9864 auto MMOIt =
MI.memoperands_begin();
9869 auto [Dst, Src, Len] =
MI.getFirst3Regs();
9871 if (Opc != TargetOpcode::G_MEMSET) {
9872 assert(MMOIt !=
MI.memoperands_end() &&
"Expected a second MMO on MI");
9874 SrcAlign =
MemOp->getBaseAlign();
9881 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
9883 if (KnownLen == 0) {
9884 MI.eraseFromParent();
9888 bool IsVolatile =
MemOp->isVolatile();
9889 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
9890 return lowerMemcpyInline(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
9897 if (MaxLen && KnownLen > MaxLen)
9900 if (Opc == TargetOpcode::G_MEMCPY) {
9901 auto &MF = *
MI.getParent()->getParent();
9902 const auto &TLI = *MF.getSubtarget().getTargetLowering();
9905 return lowerMemcpy(
MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
9908 if (Opc == TargetOpcode::G_MEMMOVE)
9909 return lowerMemmove(
MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
9910 if (Opc == TargetOpcode::G_MEMSET)
9911 return lowerMemset(
MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
unsigned const MachineRegisterInfo * MRI
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static constexpr Register SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
APInt bitcastToAPInt() const
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
APInt zext(unsigned width) const
Zero extend to a new width.
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
void negate()
Negate this APInt in place.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool hasAttributes() const
Return true if the builder has IR-level attributes.
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
const APFloat & getValueAPF() const
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Type * getReturnType() const
Returns the type of the ret val.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Register getSubVec() const
Register getBigVec() const
uint64_t getIndexImm() const
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a threeway compare.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
This is an important class for using LLVM in a threaded context.
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult lowerFAbs(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
bool isLegal(const LegalityQuery &Query) const
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
StringRef getString() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildInsertSubvector(const DstOp &Res, const SrcOp &Src0, const SrcOp &Src1, unsigned Index)
Build and insert Res = G_INSERT_SUBVECTOR Src0, Src1, Idx.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildFPTOUI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOUI Src0.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &CmpVal, const SrcOp &NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
MachineInstrBuilder buildExtractSubvector(const DstOp &Res, const SrcOp &Src, unsigned Index)
Build and insert Res = G_EXTRACT_SUBVECTOR Src, Idx0.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildSplatVector(const DstOp &Res, const SrcOp &Val)
Build and insert Res = G_SPLAT_VECTOR Val.
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
@ UndefinedBooleanContent
@ ZeroOrNegativeOneBooleanContent
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
OutputIt copy(R &&Range, OutputIt Out)
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
unsigned Log2(Align A)
Returns the log2 of the alignment.
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEdouble() LLVM_READNONE
opStatus
IEEE-754R 7: Default exception handling.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
SmallVector< ISD::ArgFlagsTy, 4 > Flags
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)