36#include "llvm/IR/IntrinsicsRISCV.h"
49#define DEBUG_TYPE "riscv-lower"
55 cl::desc(
"Give the maximum size (in number of nodes) of the web of "
56 "instructions that we will consider for VW expansion"),
61 cl::desc(
"Allow the formation of VW_W operations (e.g., "
62 "VWADD_W) with splat constants"),
67 cl::desc(
"Set the minimum number of repetitions of a divisor to allow "
68 "transformation to multiplications by the reciprocal"),
73 cl::desc(
"Give the maximum number of instructions that we will "
74 "use for creating a floating-point immediate value"),
81 if (Subtarget.isRVE())
88 !Subtarget.hasStdExtF()) {
89 errs() <<
"Hard-float 'f' ABI can't be used for a target that "
90 "doesn't support the F instruction set extension (ignoring "
94 !Subtarget.hasStdExtD()) {
95 errs() <<
"Hard-float 'd' ABI can't be used for a target that "
96 "doesn't support the D instruction set extension (ignoring "
120 if (Subtarget.hasStdExtZfbfmin())
122 if (Subtarget.hasStdExtF())
124 if (Subtarget.hasStdExtD())
128 if (Subtarget.hasStdExtZfinx())
130 if (Subtarget.hasStdExtZdinx()) {
138 MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
139 MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
141 MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
142 MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
143 MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
144 MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
145 MVT::nxv4i64, MVT::nxv8i64};
147 MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
148 MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
150 MVT::nxv1bf16, MVT::nxv2bf16, MVT::nxv4bf16,
151 MVT::nxv8bf16, MVT::nxv16bf16, MVT::nxv32bf16};
153 MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
155 MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
158 auto addRegClassForRVV = [
this](
MVT VT) {
162 if (VT.getVectorMinNumElements() < MinElts)
165 unsigned Size = VT.getSizeInBits().getKnownMinValue();
168 RC = &RISCV::VRRegClass;
170 RC = &RISCV::VRM2RegClass;
172 RC = &RISCV::VRM4RegClass;
174 RC = &RISCV::VRM8RegClass;
181 for (
MVT VT : BoolVecVTs)
182 addRegClassForRVV(VT);
183 for (
MVT VT : IntVecVTs) {
184 if (VT.getVectorElementType() == MVT::i64 &&
187 addRegClassForRVV(VT);
191 for (
MVT VT : F16VecVTs)
192 addRegClassForRVV(VT);
195 for (
MVT VT : BF16VecVTs)
196 addRegClassForRVV(VT);
199 for (
MVT VT : F32VecVTs)
200 addRegClassForRVV(VT);
203 for (
MVT VT : F64VecVTs)
204 addRegClassForRVV(VT);
207 auto addRegClassForFixedVectors = [
this](
MVT VT) {
214 if (useRVVForFixedLengthVectorVT(VT))
215 addRegClassForFixedVectors(VT);
218 if (useRVVForFixedLengthVectorVT(VT))
219 addRegClassForFixedVectors(VT);
258 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
274 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
279 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
286 if (!Subtarget.hasStdExtM())
291 {MVT::i8, MVT::i16, MVT::i32},
Custom);
300 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
303 }
else if (Subtarget.hasVendorXTHeadBb()) {
314 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
315 Subtarget.hasVendorXTHeadBb())
322 if (Subtarget.hasStdExtZbb()) {
334 if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb()) {
346 if (!Subtarget.hasVendorXTHeadCondMov())
349 static const unsigned FPLegalNodeTypes[] = {
362 static const unsigned FPOpToExpand[] = {
366 static const unsigned FPRndMode[] = {
373 static const unsigned ZfhminZfbfminPromoteOps[] = {
384 if (Subtarget.hasStdExtZfbfmin()) {
443 if (!Subtarget.hasStdExtZfa())
467 if (Subtarget.hasStdExtZfa())
479 if (Subtarget.hasStdExtZfa()) {
550 if (Subtarget.hasStdExtZicbop()) {
554 if (Subtarget.hasStdExtA()) {
557 }
else if (Subtarget.hasForcedAtomics()) {
576 {MVT::i8, MVT::i16},
Custom);
587 static const unsigned IntegerVPOps[] = {
588 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
589 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
590 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
591 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
592 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
593 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
594 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
595 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
596 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
597 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
598 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
601 static const unsigned FloatingPointVPOps[] = {
602 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
603 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
604 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
605 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
606 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
607 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
608 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
609 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
610 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
611 ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS};
613 static const unsigned IntegerVecReduceOps[] = {
618 static const unsigned FloatingPointVecReduceOps[] = {
631 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
632 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
633 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
637 for (
MVT VT : BoolVecVTs) {
664 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
688 ISD::VP_TRUNCATE, ISD::VP_SETCC},
701 for (
MVT VT : IntVecVTs) {
712 if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
759 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
760 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
784 if (Subtarget.hasStdExtZvbb()) {
788 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
795 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
804 ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
826 static const unsigned ZvfhminPromoteOps[] = {
834 static const unsigned ZvfhminPromoteVPOps[] = {
835 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
836 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
837 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
838 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SQRT,
839 ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
840 ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
841 ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
845 const auto SetCommonVFPActions = [&](
MVT VT) {
889 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
890 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
921 const auto SetCommonVFPExtLoadTruncStoreActions =
923 for (
auto SmallVT : SmallerVTs) {
930 for (
MVT VT : F16VecVTs) {
933 SetCommonVFPActions(VT);
936 for (
MVT VT : F16VecVTs) {
942 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
951 if (VT == MVT::nxv32f16) {
964 for (
MVT VT : F32VecVTs) {
967 SetCommonVFPActions(VT);
968 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
973 for (
MVT VT : F64VecVTs) {
976 SetCommonVFPActions(VT);
977 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
978 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
984 if (!useRVVForFixedLengthVectorVT(VT))
1027 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
1054 ISD::VP_SETCC, ISD::VP_TRUNCATE},
1073 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1074 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1109 if (Subtarget.hasStdExtZvbb()) {
1133 if (!useRVVForFixedLengthVectorVT(VT))
1149 ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
1180 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
1181 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1232 if (Subtarget.hasStdExtA())
1235 if (Subtarget.hasForcedAtomics()) {
1245 if (Subtarget.hasVendorXTHeadMemIdx()) {
1283 if (Subtarget.hasStdExtZbb())
1286 if (Subtarget.hasStdExtZbs() && Subtarget.
is64Bit())
1289 if (Subtarget.hasStdExtZbkb())
1301 if (Subtarget.hasVendorXTHeadMemPair())
1324MVT RISCVTargetLowering::getVPExplicitVectorLengthTy()
const {
1329bool RISCVTargetLowering::shouldExpandGetVectorLength(
EVT TripCountVT,
1331 bool IsScalable)
const {
1338 if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.
getXLenVT())
1358 unsigned Intrinsic)
const {
1359 auto &
DL =
I.getModule()->getDataLayout();
1361 auto SetRVVLoadStoreInfo = [&](
unsigned PtrOp,
bool IsStore,
1362 bool IsUnitStrided) {
1364 Info.ptrVal =
I.getArgOperand(PtrOp);
1368 MemTy =
I.getArgOperand(0)->getType();
1371 MemTy =
I.getType();
1386 if (
I.getMetadata(LLVMContext::MD_nontemporal) !=
nullptr)
1390 switch (Intrinsic) {
1393 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1394 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1395 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1396 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1397 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1398 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1399 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1400 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1401 case Intrinsic::riscv_masked_cmpxchg_i32:
1403 Info.memVT = MVT::i32;
1404 Info.ptrVal =
I.getArgOperand(0);
1410 case Intrinsic::riscv_masked_strided_load:
1411 return SetRVVLoadStoreInfo( 1,
false,
1413 case Intrinsic::riscv_masked_strided_store:
1414 return SetRVVLoadStoreInfo( 1,
true,
1416 case Intrinsic::riscv_seg2_load:
1417 case Intrinsic::riscv_seg3_load:
1418 case Intrinsic::riscv_seg4_load:
1419 case Intrinsic::riscv_seg5_load:
1420 case Intrinsic::riscv_seg6_load:
1421 case Intrinsic::riscv_seg7_load:
1422 case Intrinsic::riscv_seg8_load:
1423 return SetRVVLoadStoreInfo( 0,
false,
1425 case Intrinsic::riscv_seg2_store:
1426 case Intrinsic::riscv_seg3_store:
1427 case Intrinsic::riscv_seg4_store:
1428 case Intrinsic::riscv_seg5_store:
1429 case Intrinsic::riscv_seg6_store:
1430 case Intrinsic::riscv_seg7_store:
1431 case Intrinsic::riscv_seg8_store:
1433 return SetRVVLoadStoreInfo(
I.arg_size() - 2,
1436 case Intrinsic::riscv_vle:
1437 case Intrinsic::riscv_vle_mask:
1438 case Intrinsic::riscv_vleff:
1439 case Intrinsic::riscv_vleff_mask:
1440 return SetRVVLoadStoreInfo( 1,
1443 case Intrinsic::riscv_vse:
1444 case Intrinsic::riscv_vse_mask:
1445 return SetRVVLoadStoreInfo( 1,
1448 case Intrinsic::riscv_vlse:
1449 case Intrinsic::riscv_vlse_mask:
1450 case Intrinsic::riscv_vloxei:
1451 case Intrinsic::riscv_vloxei_mask:
1452 case Intrinsic::riscv_vluxei:
1453 case Intrinsic::riscv_vluxei_mask:
1454 return SetRVVLoadStoreInfo( 1,
1457 case Intrinsic::riscv_vsse:
1458 case Intrinsic::riscv_vsse_mask:
1459 case Intrinsic::riscv_vsoxei:
1460 case Intrinsic::riscv_vsoxei_mask:
1461 case Intrinsic::riscv_vsuxei:
1462 case Intrinsic::riscv_vsuxei_mask:
1463 return SetRVVLoadStoreInfo( 1,
1466 case Intrinsic::riscv_vlseg2:
1467 case Intrinsic::riscv_vlseg3:
1468 case Intrinsic::riscv_vlseg4:
1469 case Intrinsic::riscv_vlseg5:
1470 case Intrinsic::riscv_vlseg6:
1471 case Intrinsic::riscv_vlseg7:
1472 case Intrinsic::riscv_vlseg8:
1473 case Intrinsic::riscv_vlseg2ff:
1474 case Intrinsic::riscv_vlseg3ff:
1475 case Intrinsic::riscv_vlseg4ff:
1476 case Intrinsic::riscv_vlseg5ff:
1477 case Intrinsic::riscv_vlseg6ff:
1478 case Intrinsic::riscv_vlseg7ff:
1479 case Intrinsic::riscv_vlseg8ff:
1480 return SetRVVLoadStoreInfo(
I.arg_size() - 2,
1483 case Intrinsic::riscv_vlseg2_mask:
1484 case Intrinsic::riscv_vlseg3_mask:
1485 case Intrinsic::riscv_vlseg4_mask:
1486 case Intrinsic::riscv_vlseg5_mask:
1487 case Intrinsic::riscv_vlseg6_mask:
1488 case Intrinsic::riscv_vlseg7_mask:
1489 case Intrinsic::riscv_vlseg8_mask:
1490 case Intrinsic::riscv_vlseg2ff_mask:
1491 case Intrinsic::riscv_vlseg3ff_mask:
1492 case Intrinsic::riscv_vlseg4ff_mask:
1493 case Intrinsic::riscv_vlseg5ff_mask:
1494 case Intrinsic::riscv_vlseg6ff_mask:
1495 case Intrinsic::riscv_vlseg7ff_mask:
1496 case Intrinsic::riscv_vlseg8ff_mask:
1497 return SetRVVLoadStoreInfo(
I.arg_size() - 4,
1500 case Intrinsic::riscv_vlsseg2:
1501 case Intrinsic::riscv_vlsseg3:
1502 case Intrinsic::riscv_vlsseg4:
1503 case Intrinsic::riscv_vlsseg5:
1504 case Intrinsic::riscv_vlsseg6:
1505 case Intrinsic::riscv_vlsseg7:
1506 case Intrinsic::riscv_vlsseg8:
1507 case Intrinsic::riscv_vloxseg2:
1508 case Intrinsic::riscv_vloxseg3:
1509 case Intrinsic::riscv_vloxseg4:
1510 case Intrinsic::riscv_vloxseg5:
1511 case Intrinsic::riscv_vloxseg6:
1512 case Intrinsic::riscv_vloxseg7:
1513 case Intrinsic::riscv_vloxseg8:
1514 case Intrinsic::riscv_vluxseg2:
1515 case Intrinsic::riscv_vluxseg3:
1516 case Intrinsic::riscv_vluxseg4:
1517 case Intrinsic::riscv_vluxseg5:
1518 case Intrinsic::riscv_vluxseg6:
1519 case Intrinsic::riscv_vluxseg7:
1520 case Intrinsic::riscv_vluxseg8:
1521 return SetRVVLoadStoreInfo(
I.arg_size() - 3,
1524 case Intrinsic::riscv_vlsseg2_mask:
1525 case Intrinsic::riscv_vlsseg3_mask:
1526 case Intrinsic::riscv_vlsseg4_mask:
1527 case Intrinsic::riscv_vlsseg5_mask:
1528 case Intrinsic::riscv_vlsseg6_mask:
1529 case Intrinsic::riscv_vlsseg7_mask:
1530 case Intrinsic::riscv_vlsseg8_mask:
1531 case Intrinsic::riscv_vloxseg2_mask:
1532 case Intrinsic::riscv_vloxseg3_mask:
1533 case Intrinsic::riscv_vloxseg4_mask:
1534 case Intrinsic::riscv_vloxseg5_mask:
1535 case Intrinsic::riscv_vloxseg6_mask:
1536 case Intrinsic::riscv_vloxseg7_mask:
1537 case Intrinsic::riscv_vloxseg8_mask:
1538 case Intrinsic::riscv_vluxseg2_mask:
1539 case Intrinsic::riscv_vluxseg3_mask:
1540 case Intrinsic::riscv_vluxseg4_mask:
1541 case Intrinsic::riscv_vluxseg5_mask:
1542 case Intrinsic::riscv_vluxseg6_mask:
1543 case Intrinsic::riscv_vluxseg7_mask:
1544 case Intrinsic::riscv_vluxseg8_mask:
1545 return SetRVVLoadStoreInfo(
I.arg_size() - 5,
1548 case Intrinsic::riscv_vsseg2:
1549 case Intrinsic::riscv_vsseg3:
1550 case Intrinsic::riscv_vsseg4:
1551 case Intrinsic::riscv_vsseg5:
1552 case Intrinsic::riscv_vsseg6:
1553 case Intrinsic::riscv_vsseg7:
1554 case Intrinsic::riscv_vsseg8:
1555 return SetRVVLoadStoreInfo(
I.arg_size() - 2,
1558 case Intrinsic::riscv_vsseg2_mask:
1559 case Intrinsic::riscv_vsseg3_mask:
1560 case Intrinsic::riscv_vsseg4_mask:
1561 case Intrinsic::riscv_vsseg5_mask:
1562 case Intrinsic::riscv_vsseg6_mask:
1563 case Intrinsic::riscv_vsseg7_mask:
1564 case Intrinsic::riscv_vsseg8_mask:
1565 return SetRVVLoadStoreInfo(
I.arg_size() - 3,
1568 case Intrinsic::riscv_vssseg2:
1569 case Intrinsic::riscv_vssseg3:
1570 case Intrinsic::riscv_vssseg4:
1571 case Intrinsic::riscv_vssseg5:
1572 case Intrinsic::riscv_vssseg6:
1573 case Intrinsic::riscv_vssseg7:
1574 case Intrinsic::riscv_vssseg8:
1575 case Intrinsic::riscv_vsoxseg2:
1576 case Intrinsic::riscv_vsoxseg3:
1577 case Intrinsic::riscv_vsoxseg4:
1578 case Intrinsic::riscv_vsoxseg5:
1579 case Intrinsic::riscv_vsoxseg6:
1580 case Intrinsic::riscv_vsoxseg7:
1581 case Intrinsic::riscv_vsoxseg8:
1582 case Intrinsic::riscv_vsuxseg2:
1583 case Intrinsic::riscv_vsuxseg3:
1584 case Intrinsic::riscv_vsuxseg4:
1585 case Intrinsic::riscv_vsuxseg5:
1586 case Intrinsic::riscv_vsuxseg6:
1587 case Intrinsic::riscv_vsuxseg7:
1588 case Intrinsic::riscv_vsuxseg8:
1589 return SetRVVLoadStoreInfo(
I.arg_size() - 3,
1592 case Intrinsic::riscv_vssseg2_mask:
1593 case Intrinsic::riscv_vssseg3_mask:
1594 case Intrinsic::riscv_vssseg4_mask:
1595 case Intrinsic::riscv_vssseg5_mask:
1596 case Intrinsic::riscv_vssseg6_mask:
1597 case Intrinsic::riscv_vssseg7_mask:
1598 case Intrinsic::riscv_vssseg8_mask:
1599 case Intrinsic::riscv_vsoxseg2_mask:
1600 case Intrinsic::riscv_vsoxseg3_mask:
1601 case Intrinsic::riscv_vsoxseg4_mask:
1602 case Intrinsic::riscv_vsoxseg5_mask:
1603 case Intrinsic::riscv_vsoxseg6_mask:
1604 case Intrinsic::riscv_vsoxseg7_mask:
1605 case Intrinsic::riscv_vsoxseg8_mask:
1606 case Intrinsic::riscv_vsuxseg2_mask:
1607 case Intrinsic::riscv_vsuxseg3_mask:
1608 case Intrinsic::riscv_vsuxseg4_mask:
1609 case Intrinsic::riscv_vsuxseg5_mask:
1610 case Intrinsic::riscv_vsuxseg6_mask:
1611 case Intrinsic::riscv_vsuxseg7_mask:
1612 case Intrinsic::riscv_vsuxseg8_mask:
1613 return SetRVVLoadStoreInfo(
I.arg_size() - 4,
1650 return isInt<12>(Imm);
1654 return isInt<12>(Imm);
1667 return (SrcBits == 64 && DestBits == 32);
1678 return (SrcBits == 64 && DestBits == 32);
1685 if (
auto *LD = dyn_cast<LoadSDNode>(Val)) {
1686 EVT MemVT = LD->getMemoryVT();
1687 if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
1697 return Subtarget.
is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
1705 return Subtarget.hasStdExtZbb();
1709 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1720 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1725 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1729 EVT VT =
Y.getValueType();
1735 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1736 !isa<ConstantSDNode>(
Y);
1741 if (Subtarget.hasStdExtZbs())
1742 return X.getValueType().isScalarInteger();
1743 auto *
C = dyn_cast<ConstantSDNode>(
Y);
1745 if (Subtarget.hasVendorXTHeadBs())
1746 return C !=
nullptr;
1748 return C &&
C->getAPIntValue().ule(10);
1768 if (BitSize > Subtarget.
getXLen())
1772 int64_t Val = Imm.getSExtValue();
1780 if (!Subtarget.enableUnalignedScalarMem())
1797 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
1804 if (XC && OldShiftOpcode ==
ISD::SRL && XC->isOne())
1808 if (NewShiftOpcode ==
ISD::SRL &&
CC->isOne())
1820 case Instruction::Add:
1821 case Instruction::Sub:
1822 case Instruction::Mul:
1823 case Instruction::And:
1824 case Instruction::Or:
1825 case Instruction::Xor:
1826 case Instruction::FAdd:
1827 case Instruction::FSub:
1828 case Instruction::FMul:
1829 case Instruction::FDiv:
1830 case Instruction::ICmp:
1831 case Instruction::FCmp:
1833 case Instruction::Shl:
1834 case Instruction::LShr:
1835 case Instruction::AShr:
1836 case Instruction::UDiv:
1837 case Instruction::SDiv:
1838 case Instruction::URem:
1839 case Instruction::SRem:
1840 return Operand == 1;
1854 auto *II = dyn_cast<IntrinsicInst>(
I);
1858 switch (II->getIntrinsicID()) {
1859 case Intrinsic::fma:
1860 case Intrinsic::vp_fma:
1861 return Operand == 0 || Operand == 1;
1862 case Intrinsic::vp_shl:
1863 case Intrinsic::vp_lshr:
1864 case Intrinsic::vp_ashr:
1865 case Intrinsic::vp_udiv:
1866 case Intrinsic::vp_sdiv:
1867 case Intrinsic::vp_urem:
1868 case Intrinsic::vp_srem:
1869 return Operand == 1;
1871 case Intrinsic::vp_add:
1872 case Intrinsic::vp_mul:
1873 case Intrinsic::vp_and:
1874 case Intrinsic::vp_or:
1875 case Intrinsic::vp_xor:
1876 case Intrinsic::vp_fadd:
1877 case Intrinsic::vp_fmul:
1878 case Intrinsic::vp_icmp:
1879 case Intrinsic::vp_fcmp:
1881 case Intrinsic::vp_sub:
1882 case Intrinsic::vp_fsub:
1883 case Intrinsic::vp_fdiv:
1884 return Operand == 0 || Operand == 1;
1900 for (
auto OpIdx :
enumerate(
I->operands())) {
1904 Instruction *
Op = dyn_cast<Instruction>(OpIdx.value().get());
1906 if (!
Op ||
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
1915 if (cast<VectorType>(
Op->getType())->getElementType()->isIntegerTy(1))
1920 for (
Use &U :
Op->uses()) {
1966 if (!Subtarget.hasStdExtZfa())
1969 bool IsSupportedVT =
false;
1970 if (VT == MVT::f16) {
1971 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
1972 }
else if (VT == MVT::f32) {
1973 IsSupportedVT =
true;
1974 }
else if (VT == MVT::f64) {
1975 assert(Subtarget.hasStdExtD() &&
"Expect D extension");
1976 IsSupportedVT =
true;
1986 bool ForCodeSize)
const {
1987 bool IsLegalVT =
false;
1990 else if (VT == MVT::f32)
1992 else if (VT == MVT::f64)
1994 else if (VT == MVT::bf16)
1995 IsLegalVT = Subtarget.hasStdExtZfbfmin();
2007 return Imm.isZero();
2010 int Cost = Imm.isNegZero()
2014 Subtarget.getFeatureBits());
2021 unsigned Index)
const {
2034 if ((ResElts * 2) != SrcElts)
2086 isa<ConstantSDNode>(
LHS.getOperand(1))) {
2092 ShAmt =
LHS.getValueSizeInBits() - 1 -
Log2_64(Mask);
2105 if (
auto *RHSC = dyn_cast<ConstantSDNode>(
RHS)) {
2106 int64_t
C = RHSC->getSExtValue();
2148 switch (KnownSize) {
2176 return RISCV::VRRegClassID;
2178 return RISCV::VRM2RegClassID;
2180 return RISCV::VRM4RegClassID;
2182 return RISCV::VRM8RegClassID;
2192 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
2193 "Unexpected subreg numbering");
2194 return RISCV::sub_vrm1_0 +
Index;
2197 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
2198 "Unexpected subreg numbering");
2199 return RISCV::sub_vrm2_0 +
Index;
2202 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
2203 "Unexpected subreg numbering");
2204 return RISCV::sub_vrm4_0 +
Index;
2211 return RISCV::VRRegClassID;
2220std::pair<unsigned, unsigned>
2222 MVT VecVT,
MVT SubVecVT,
unsigned InsertExtractIdx,
2224 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
2225 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
2226 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
2227 "Register classes not ordered");
2236 unsigned SubRegIdx = RISCV::NoSubRegister;
2237 for (
const unsigned RCID :
2238 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
2239 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
2243 SubRegIdx =
TRI->composeSubRegIndices(SubRegIdx,
2248 return {SubRegIdx, InsertExtractIdx};
2253bool RISCVTargetLowering::mergeStoresAfterLegalization(
EVT VT)
const {
2282unsigned RISCVTargetLowering::combineRepeatedFPDivisors()
const {
2289 "Unexpected opcode");
2291 unsigned IntNo =
Op.getConstantOperandVal(HasChain ? 1 : 0);
2293 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
2296 return Op.getOperand(II->
VLOperand + 1 + HasChain);
2366bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(
MVT VT)
const {
2367 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
2376 "Expected legal fixed length vector!");
2379 unsigned MaxELen = Subtarget.
getELen();
2412 return ::getContainerForFixedLengthVector(*
this, VT,
getSubtarget());
2419 "Expected to convert into a scalable vector!");
2420 assert(V.getValueType().isFixedLengthVector() &&
2421 "Expected a fixed length vector operand!");
2431 "Expected to convert into a fixed length vector!");
2432 assert(V.getValueType().isScalableVector() &&
2433 "Expected a scalable vector operand!");
2461static std::pair<SDValue, SDValue>
2474static std::pair<SDValue, SDValue>
2488static std::pair<SDValue, SDValue>
2511 EVT VT,
unsigned DefinedValues)
const {
2525 std::tie(LMul, Fractional) =
2528 Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
2530 Cost = (LMul * DLenFactor);
2567 MVT DstVT =
Op.getSimpleValueType();
2568 EVT SatVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
2576 Src.getValueType() == MVT::bf16) {
2583 else if (DstVT == MVT::i64 && SatVT == MVT::i32)
2591 Opc,
DL, DstVT, Src,
2605 MVT SrcVT = Src.getSimpleValueType();
2611 if (SatVT != DstEltVT)
2615 if (SrcEltSize > (2 * DstEltSize))
2618 MVT DstContainerVT = DstVT;
2619 MVT SrcContainerVT = SrcVT;
2625 "Expected same element count");
2634 {Src, Src, DAG.getCondCode(ISD::SETNE),
2635 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2639 if (DstEltSize > (2 * SrcEltSize)) {
2665 case ISD::VP_FROUNDEVEN:
2669 case ISD::VP_FROUNDTOZERO:
2673 case ISD::VP_FFLOOR:
2681 case ISD::VP_FROUND:
2697 MVT VT =
Op.getSimpleValueType();
2704 MVT ContainerVT = VT;
2711 if (
Op->isVPOpcode()) {
2712 Mask =
Op.getOperand(1);
2716 VL =
Op.getOperand(2);
2738 DAG.
getUNDEF(ContainerVT), MaxValNode, VL);
2752 switch (
Op.getOpcode()) {
2758 case ISD::VP_FFLOOR:
2761 case ISD::VP_FROUND:
2762 case ISD::VP_FROUNDEVEN:
2763 case ISD::VP_FROUNDTOZERO: {
2779 case ISD::VP_FNEARBYINT:
2792 Src, Src, Mask, VL);
2807 MVT VT =
Op.getSimpleValueType();
2811 MVT ContainerVT = VT;
2823 MVT MaskVT = Mask.getSimpleValueType();
2826 {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
2827 DAG.getUNDEF(MaskVT), Mask, VL});
2831 {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
2832 Chain = Src.getValue(1);
2848 DAG.
getUNDEF(ContainerVT), MaxValNode, VL);
2860 switch (
Op.getOpcode()) {
2871 {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
2877 DAG.
getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
2881 DAG.
getVTList(ContainerVT, MVT::Other), Chain, Src,
2890 DAG.
getVTList(ContainerVT, MVT::Other), Chain,
2891 Truncated, Mask, VL);
2897 Src, Src, Mask, VL);
2907 MVT VT =
Op.getSimpleValueType();
2937 if (
Merge.isUndef())
2949 if (
Merge.isUndef())
2976 return std::nullopt;
2993 bool IsInteger =
Op.getValueType().isInteger();
2995 std::optional<unsigned> SeqStepDenom;
2996 std::optional<int64_t> SeqStepNum, SeqAddend;
2997 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2998 unsigned EltSizeInBits =
Op.getValueType().getScalarSizeInBits();
2999 for (
unsigned Idx = 0;
Idx < NumElts;
Idx++) {
3002 if (
Op.getOperand(
Idx).isUndef())
3008 if (!isa<ConstantSDNode>(
Op.getOperand(
Idx)))
3009 return std::nullopt;
3010 Val =
Op.getConstantOperandVal(
Idx) &
3011 maskTrailingOnes<uint64_t>(EltSizeInBits);
3014 if (!isa<ConstantFPSDNode>(
Op.getOperand(
Idx)))
3015 return std::nullopt;
3017 cast<ConstantFPSDNode>(
Op.getOperand(
Idx))->getValueAPF(),
3019 Val = *ExactInteger;
3021 return std::nullopt;
3027 unsigned IdxDiff =
Idx - PrevElt->second;
3028 int64_t ValDiff =
SignExtend64(Val - PrevElt->first, EltSizeInBits);
3036 int64_t Remainder = ValDiff % IdxDiff;
3038 if (Remainder != ValDiff) {
3041 return std::nullopt;
3047 SeqStepNum = ValDiff;
3048 else if (ValDiff != SeqStepNum)
3049 return std::nullopt;
3052 SeqStepDenom = IdxDiff;
3053 else if (IdxDiff != *SeqStepDenom)
3054 return std::nullopt;
3058 if (!PrevElt || PrevElt->first != Val)
3059 PrevElt = std::make_pair(Val,
Idx);
3063 if (!SeqStepNum || !SeqStepDenom)
3064 return std::nullopt;
3068 for (
unsigned Idx = 0;
Idx < NumElts;
Idx++) {
3069 if (
Op.getOperand(
Idx).isUndef())
3073 Val =
Op.getConstantOperandVal(
Idx) &
3074 maskTrailingOnes<uint64_t>(EltSizeInBits);
3077 cast<ConstantFPSDNode>(
Op.getOperand(
Idx))->getValueAPF(),
3081 (int64_t)(
Idx * (
uint64_t)*SeqStepNum) / *SeqStepDenom;
3082 int64_t Addend =
SignExtend64(Val - ExpectedVal, EltSizeInBits);
3085 else if (Addend != SeqAddend)
3086 return std::nullopt;
3089 assert(SeqAddend &&
"Must have an addend if we have a step");
3091 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
3112 MVT ContainerVT = VT;
3140 MVT VT =
Op.getSimpleValueType();
3152 unsigned MostCommonCount = 0;
3154 unsigned NumUndefElts =
3162 unsigned NumScalarLoads = 0;
3168 ValueCounts.
insert(std::make_pair(V, 0));
3169 unsigned &Count = ValueCounts[V];
3171 if (
auto *CFP = dyn_cast<ConstantFPSDNode>(V))
3172 NumScalarLoads += !CFP->isExactlyValue(+0.0);
3177 if (++Count >= MostCommonCount) {
3179 MostCommonCount = Count;
3183 assert(DominantValue &&
"Not expecting an all-undef BUILD_VECTOR");
3184 unsigned NumDefElts = NumElts - NumUndefElts;
3185 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
3191 ((MostCommonCount > DominantValueCountThreshold) ||
3204 !LastOp.isUndef() && ValueCounts[LastOp] == 1 &&
3205 LastOp != DominantValue) {
3212 Processed.insert(LastOp);
3217 const SDValue &V = OpIdx.value();
3218 if (V.isUndef() || !Processed.insert(V).second)
3220 if (ValueCounts[V] == 1) {
3229 return DAG.getConstant(V == V1, DL, XLenVT);
3245 MVT VT =
Op.getSimpleValueType();
3275 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.
getXLen());
3276 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.
getELen());
3284 unsigned IntegerViaVecElts =
divideCeil(NumElts, NumViaIntegerBits);
3285 MVT IntegerViaVecVT =
3290 unsigned BitPos = 0, IntegerEltIdx = 0;
3293 for (
unsigned I = 0;
I < NumElts;) {
3295 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
3296 Bits |= ((
uint64_t)BitValue << BitPos);
3302 if (
I % NumViaIntegerBits == 0 ||
I == NumElts) {
3303 if (NumViaIntegerBits <= 32)
3304 Bits = SignExtend64<32>(Bits);
3306 Elts[IntegerEltIdx] = Elt;
3315 if (NumElts < NumViaIntegerBits) {
3319 assert(IntegerViaVecVT == MVT::v1i8 &&
"Unexpected mask vector type");
3345 int64_t StepNumerator = SimpleVID->StepNumerator;
3346 unsigned StepDenominator = SimpleVID->StepDenominator;
3347 int64_t Addend = SimpleVID->Addend;
3349 assert(StepNumerator != 0 &&
"Invalid step");
3350 bool Negate =
false;
3351 int64_t SplatStepVal = StepNumerator;
3355 if (StepNumerator != 1 && StepNumerator !=
INT64_MIN &&
3357 Negate = StepNumerator < 0;
3359 SplatStepVal =
Log2_64(std::abs(StepNumerator));
3366 if (((StepOpcode ==
ISD::MUL && isInt<12>(SplatStepVal)) ||
3367 (StepOpcode ==
ISD::SHL && isUInt<5>(SplatStepVal))) &&
3369 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
3372 MVT VIDContainerVT =
3380 if ((StepOpcode ==
ISD::MUL && SplatStepVal != 1) ||
3381 (StepOpcode ==
ISD::SHL && SplatStepVal != 0)) {
3384 VID = DAG.
getNode(StepOpcode,
DL, VIDVT, VID, SplatStep);
3386 if (StepDenominator != 1) {
3391 if (Addend != 0 || Negate) {
3411 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32) &&
3412 "Unexpected sequence type");
3416 unsigned ViaVecLen =
3420 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3423 for (
const auto &OpIdx :
enumerate(
Op->op_values())) {
3424 const auto &SeqV = OpIdx.value();
3425 if (!SeqV.isUndef())
3426 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3427 << (OpIdx.index() * EltBitSize));
3432 if (Subtarget.
is64Bit() && ViaIntVT == MVT::i32)
3433 SplatValue = SignExtend64<32>(SplatValue);
3455 const auto *BV = cast<BuildVectorSDNode>(
Op);
3456 if (VT.
isInteger() && EltBitSize < 64 &&
3458 BV->getRepeatedSequence(Sequence) &&
3459 (Sequence.size() * EltBitSize) <= 64) {
3460 unsigned SeqLen = Sequence.size();
3462 assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
3463 ViaIntVT == MVT::i64) &&
3464 "Unexpected sequence type");
3469 const unsigned RequiredVL = NumElts / SeqLen;
3470 const unsigned ViaVecLen =
3472 NumElts : RequiredVL;
3475 unsigned EltIdx = 0;
3476 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
3480 for (
const auto &SeqV : Sequence) {
3481 if (!SeqV.isUndef())
3482 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
3483 << (EltIdx * EltBitSize));
3489 if (Subtarget.
is64Bit() && ViaIntVT == MVT::i32)
3490 SplatValue = SignExtend64<32>(SplatValue);
3496 (!Subtarget.
is64Bit() && ViaIntVT == MVT::i64)) &&
3497 "Unexpected bitcast sequence");
3498 if (ViaIntVT.
bitsLE(XLenVT) || isInt<32>(SplatValue)) {
3501 MVT ViaContainerVT =
3508 if (ViaVecLen != RequiredVL)
3526 const unsigned ScalarSize =
3527 Op.getSimpleValueType().getScalarSizeInBits();
3528 if (ScalarSize > 8 && NumElts <= 4) {
3530 if (ScalarSize - SignBits < 8) {
3534 Source, DAG, Subtarget);
3548 MVT VT =
Op.getSimpleValueType();
3574 "Unexpected type for i1 splat value");
3627 unsigned UndefCount = 0;
3634 LinearBudget -= PerSlideCost;
3637 LinearBudget -= PerSlideCost;
3640 LinearBudget -= PerSlideCost;
3643 if (LinearBudget < 0)
3648 "Illegal type which will result in reserved encoding");
3662 Vec,
Offset, Mask, VL, Policy);
3673 Vec,
Offset, Mask, VL, Policy);
3683 if (isa<ConstantSDNode>(
Lo) && isa<ConstantSDNode>(
Hi)) {
3684 int32_t LoC = cast<ConstantSDNode>(
Lo)->getSExtValue();
3685 int32_t HiC = cast<ConstantSDNode>(
Hi)->getSExtValue();
3688 if ((LoC >> 31) == HiC)
3699 (isa<RegisterSDNode>(VL) &&
3700 cast<RegisterSDNode>(VL)->
getReg() == RISCV::X0))
3702 else if (isa<ConstantSDNode>(VL) &&
3703 isUInt<4>(cast<ConstantSDNode>(VL)->getZExtValue()))
3719 isa<ConstantSDNode>(
Hi.getOperand(1)) &&
3720 Hi.getConstantOperandVal(1) == 31)
3739 assert(Scalar.getValueType() == MVT::i64 &&
"Unexpected VT!");
3751 bool HasPassthru = Passthru && !Passthru.
isUndef();
3752 if (!HasPassthru && !Passthru)
3760 if (Scalar.getValueType().bitsLE(XLenVT)) {
3767 Scalar = DAG.
getNode(ExtOpc,
DL, XLenVT, Scalar);
3771 assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
3772 "Unexpected scalar for splat lowering!");
3784 "Unexpected vector MVT");
3804 SDValue ExtractedVal = Scalar.getOperand(0);
3806 MVT ExtractedContainerVT = ExtractedVT;
3809 DAG, ExtractedContainerVT, Subtarget);
3813 if (ExtractedContainerVT.
bitsLE(VT))
3827 if (!Scalar.getValueType().bitsLE(XLenVT))
3830 VT,
DL, DAG, Subtarget);
3838 Scalar = DAG.
getNode(ExtOpc,
DL, XLenVT, Scalar);
3864 if (Src != V2.getOperand(0))
3868 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
3873 V2.getConstantOperandVal(1) != Mask.size())
3877 if (Mask[0] != 0 && Mask[0] != 1)
3882 for (
unsigned i = 1; i != Mask.size(); ++i)
3883 if (Mask[i] != Mask[i - 1] + 2)
3901 int Size = Mask.size();
3903 assert(
Size == (
int)NumElts &&
"Unexpected mask size");
3909 EvenSrc = StartIndexes[0];
3910 OddSrc = StartIndexes[1];
3913 if (EvenSrc != 0 && OddSrc != 0)
3923 int HalfNumElts = NumElts / 2;
3924 return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
3940 int Size = Mask.size();
3952 for (
int i = 0; i !=
Size; ++i) {
3958 int StartIdx = i - (M %
Size);
3966 int CandidateRotation = StartIdx < 0 ? -StartIdx :
Size - StartIdx;
3969 Rotation = CandidateRotation;
3970 else if (Rotation != CandidateRotation)
3975 int MaskSrc = M <
Size ? 0 : 1;
3980 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
3985 TargetSrc = MaskSrc;
3986 else if (TargetSrc != MaskSrc)
3993 assert(Rotation != 0 &&
"Failed to locate a viable rotation!");
3994 assert((LoSrc >= 0 || HiSrc >= 0) &&
3995 "Failed to find a rotated input vector!");
4010 MVT ContainerVT = VT;
4013 assert(Src.getSimpleValueType().isFixedLengthVector());
4017 MVT SrcContainerVT =
4030 Src = DAG.
getBitcast(WideSrcContainerVT, Src);
4037 unsigned Shift = EvenElts ? 0 : EltBits;
4043 DAG.
getUNDEF(IntContainerVT), TrueMask, VL);
4069 auto findNonEXTRACT_SUBVECTORParent =
4070 [](
SDValue Parent) -> std::pair<SDValue, uint64_t> {
4075 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
4076 Offset += Parent.getConstantOperandVal(1);
4077 Parent = Parent.getOperand(0);
4079 return std::make_pair(Parent,
Offset);
4082 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
4083 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
4092 for (
size_t i = 0; i != NewMask.
size(); ++i) {
4093 if (NewMask[i] == -1)
4096 if (
static_cast<size_t>(NewMask[i]) < NewMask.
size()) {
4097 NewMask[i] = NewMask[i] + V1IndexOffset;
4101 NewMask[i] = NewMask[i] - NewMask.
size() + V2IndexOffset;
4107 if (NewMask[0] <= 0)
4111 for (
unsigned i = 1; i != NewMask.
size(); ++i)
4112 if (NewMask[i - 1] + 1 != NewMask[i])
4116 MVT SrcVT = Src.getSimpleValueType();
4147 int NumSubElts,
Index;
4152 bool OpsSwapped = Mask[
Index] < (int)NumElts;
4153 SDValue InPlace = OpsSwapped ? V2 : V1;
4154 SDValue ToInsert = OpsSwapped ? V1 : V2;
4164 if (NumSubElts +
Index >= (
int)NumElts)
4178 Res =
getVSlideup(DAG, Subtarget,
DL, ContainerVT, InPlace, ToInsert,
4190 bool OpsSwapped =
false;
4191 if (!isa<BuildVectorSDNode>(V1)) {
4192 if (!isa<BuildVectorSDNode>(V2))
4197 SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
4205 const unsigned E = Mask.size() - ((
Offset > 0) ?
Offset : 0);
4206 for (
unsigned i = S; i !=
E; ++i)
4207 if (Mask[i] >= 0 && (
unsigned)Mask[i] !=
Base + i +
Offset)
4213 bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
4214 if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
4217 const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
4219 if (InsertIdx < 0 || InsertIdx / NumElts != (
unsigned)OpsSwapped)
4224 auto OpCode = IsVSlidedown ?
4227 auto Vec = DAG.
getNode(OpCode,
DL, ContainerVT,
4230 Splat, TrueMask, VL);
4241 MVT VecContainerVT = VecVT;
4258 MVT WideContainerVT = WideVT;
4264 EvenV = DAG.
getBitcast(VecContainerVT, EvenV);
4273 EvenV, OddV, Passthru, Mask, VL);