35#include "llvm/IR/IntrinsicsRISCV.h"
47#define DEBUG_TYPE "riscv-lower"
53 cl::desc(
"Give the maximum size (in number of nodes) of the web of "
54 "instructions that we will consider for VW expansion"),
59 cl::desc(
"Allow the formation of VW_W operations (e.g., "
60 "VWADD_W) with splat constants"),
65 cl::desc(
"Set the minimum number of repetitions of a divisor to allow "
66 "transformation to multiplications by the reciprocal"),
71 cl::desc(
"Give the maximum number of instructions that we will "
72 "use for creating a floating-point immediate value"),
79 if (Subtarget.isRV32E())
86 !Subtarget.hasStdExtF()) {
87 errs() <<
"Hard-float 'f' ABI can't be used for a target that "
88 "doesn't support the F instruction set extension (ignoring "
92 !Subtarget.hasStdExtD()) {
93 errs() <<
"Hard-float 'd' ABI can't be used for a target that "
94 "doesn't support the D instruction set extension (ignoring "
118 if (Subtarget.hasStdExtF())
120 if (Subtarget.hasStdExtD())
141 auto addRegClassForRVV = [
this](
MVT VT) {
145 if (VT.getVectorMinNumElements() < MinElts)
148 unsigned Size = VT.getSizeInBits().getKnownMinValue();
151 RC = &RISCV::VRRegClass;
153 RC = &RISCV::VRM2RegClass;
155 RC = &RISCV::VRM4RegClass;
157 RC = &RISCV::VRM8RegClass;
164 for (
MVT VT : BoolVecVTs)
165 addRegClassForRVV(VT);
166 for (
MVT VT : IntVecVTs) {
167 if (VT.getVectorElementType() ==
MVT::i64 &&
170 addRegClassForRVV(VT);
174 for (
MVT VT : F16VecVTs)
175 addRegClassForRVV(VT);
178 for (
MVT VT : F32VecVTs)
179 addRegClassForRVV(VT);
182 for (
MVT VT : F64VecVTs)
183 addRegClassForRVV(VT);
186 auto addRegClassForFixedVectors = [
this](
MVT VT) {
193 if (useRVVForFixedLengthVectorVT(VT))
194 addRegClassForFixedVectors(VT);
197 if (useRVVForFixedLengthVectorVT(VT))
198 addRegClassForFixedVectors(VT);
237 if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
252 {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
257 if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
267 if (!Subtarget.hasStdExtM()) {
284 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
285 Subtarget.hasVendorXTHeadBb()) {
295 (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
296 Subtarget.hasVendorXTHeadBb())
303 if (Subtarget.hasStdExtZbb()) {
315 if (Subtarget.hasVendorXTHeadBb()) {
327 if (!Subtarget.hasVendorXVentanaCondOps() &&
328 !Subtarget.hasVendorXTHeadCondMov())
331 static const unsigned FPLegalNodeTypes[] = {
344 static const unsigned FPOpToExpand[] = {
348 static const unsigned FPRndMode[] = {
356 if (Subtarget.hasStdExtZfh()) {
362 static const unsigned ZfhminPromoteOps[] = {
408 if (Subtarget.hasStdExtF()) {
420 if (Subtarget.hasStdExtZfa())
424 if (Subtarget.hasStdExtF() && Subtarget.
is64Bit())
427 if (Subtarget.hasStdExtD()) {
430 if (Subtarget.hasStdExtZfa()) {
459 if (Subtarget.hasStdExtF()) {
490 if (Subtarget.hasStdExtA()) {
493 }
else if (Subtarget.hasForcedAtomics()) {
521 static const unsigned IntegerVPOps[] = {
522 ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
523 ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
524 ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
525 ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
526 ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
527 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
528 ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
529 ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FP_TO_SINT,
530 ISD::VP_FP_TO_UINT, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
531 ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE, ISD::VP_SMIN,
532 ISD::VP_SMAX, ISD::VP_UMIN, ISD::VP_UMAX,
535 static const unsigned FloatingPointVPOps[] = {
536 ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
537 ISD::VP_FDIV, ISD::VP_FNEG, ISD::VP_FABS,
538 ISD::VP_FMA, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
539 ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
540 ISD::VP_SELECT, ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP,
541 ISD::VP_SETCC, ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND,
542 ISD::VP_SQRT, ISD::VP_FMINNUM, ISD::VP_FMAXNUM,
543 ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
544 ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
545 ISD::VP_FRINT, ISD::VP_FNEARBYINT};
547 static const unsigned IntegerVecReduceOps[] = {
552 static const unsigned FloatingPointVecReduceOps[] = {
565 ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
566 ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
567 ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
571 for (
MVT VT : BoolVecVTs) {
597 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
619 ISD::VP_TRUNCATE, ISD::VP_SETCC},
632 for (
MVT VT : IntVecVTs) {
643 if (VT.getVectorElementType() ==
MVT::i64 && !Subtarget.hasStdExtV())
657 ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
697 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
698 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
746 const auto SetCommonVFPActions = [&](
MVT VT) {
789 {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
790 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
815 const auto SetCommonVFPExtLoadTruncStoreActions =
817 for (
auto SmallVT : SmallerVTs) {
824 for (
MVT VT : F16VecVTs) {
827 SetCommonVFPActions(VT);
832 for (
MVT VT : F32VecVTs) {
835 SetCommonVFPActions(VT);
836 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
841 for (
MVT VT : F64VecVTs) {
844 SetCommonVFPActions(VT);
845 SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
846 SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
852 if (!useRVVForFixedLengthVectorVT(VT))
889 {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
905 ISD::VP_SETCC, ISD::VP_TRUNCATE},
926 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
927 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
972 if (!useRVVForFixedLengthVectorVT(VT))
997 ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
998 ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
1035 if (Subtarget.hasStdExtF())
1037 if (Subtarget.hasStdExtD())
1042 if (Subtarget.hasForcedAtomics()) {
1052 if (Subtarget.hasVendorXTHeadMemIdx()) {
1086 if (Subtarget.hasStdExtF())
1089 if (Subtarget.hasStdExtZbb())
1092 if (Subtarget.hasStdExtZbs() && Subtarget.
is64Bit())
1095 if (Subtarget.hasStdExtZbkb())
1099 if (Subtarget.hasStdExtF())
1106 if (Subtarget.hasVendorXTHeadMemPair())
1126MVT RISCVTargetLowering::getVPExplicitVectorLengthTy()
const {
1133 unsigned Intrinsic)
const {
1134 auto &
DL =
I.getModule()->getDataLayout();
1135 switch (Intrinsic) {
1138 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
1139 case Intrinsic::riscv_masked_atomicrmw_add_i32:
1140 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
1141 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
1142 case Intrinsic::riscv_masked_atomicrmw_max_i32:
1143 case Intrinsic::riscv_masked_atomicrmw_min_i32:
1144 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
1145 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
1146 case Intrinsic::riscv_masked_cmpxchg_i32:
1149 Info.ptrVal =
I.getArgOperand(0);
1155 case Intrinsic::riscv_masked_strided_load:
1157 Info.ptrVal =
I.getArgOperand(1);
1159 Info.align =
Align(
DL.getTypeSizeInBits(
I.getType()->getScalarType()) / 8);
1163 case Intrinsic::riscv_masked_strided_store:
1165 Info.ptrVal =
I.getArgOperand(1);
1169 DL.getTypeSizeInBits(
I.getArgOperand(0)->getType()->getScalarType()) /
1174 case Intrinsic::riscv_seg2_load:
1175 case Intrinsic::riscv_seg3_load:
1176 case Intrinsic::riscv_seg4_load:
1177 case Intrinsic::riscv_seg5_load:
1178 case Intrinsic::riscv_seg6_load:
1179 case Intrinsic::riscv_seg7_load:
1180 case Intrinsic::riscv_seg8_load:
1182 Info.ptrVal =
I.getArgOperand(0);
1184 getValueType(
DL,
I.getType()->getStructElementType(0)->getScalarType());
1187 I.getType()->getStructElementType(0)->getScalarType()) /
1192 case Intrinsic::riscv_seg2_store:
1193 case Intrinsic::riscv_seg3_store:
1194 case Intrinsic::riscv_seg4_store:
1195 case Intrinsic::riscv_seg5_store:
1196 case Intrinsic::riscv_seg6_store:
1197 case Intrinsic::riscv_seg7_store:
1198 case Intrinsic::riscv_seg8_store:
1201 Info.ptrVal =
I.getArgOperand(
I.getNumOperands() - 3);
1205 DL.getTypeSizeInBits(
I.getArgOperand(0)->getType()->getScalarType()) /
1244 return isInt<12>(Imm);
1248 return isInt<12>(Imm);
1261 return (SrcBits == 64 && DestBits == 32);
1272 return (SrcBits == 64 && DestBits == 32);
1279 if (
auto *LD = dyn_cast<LoadSDNode>(Val)) {
1280 EVT MemVT = LD->getMemoryVT();
1299 return Subtarget.hasStdExtZbb();
1303 return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
1314 if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
1319 return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2();
1323 EVT VT =
Y.getValueType();
1329 return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
1330 !isa<ConstantSDNode>(
Y);
1335 if (Subtarget.hasStdExtZbs())
1336 return X.getValueType().isScalarInteger();
1337 auto *
C = dyn_cast<ConstantSDNode>(
Y);
1339 if (Subtarget.hasVendorXTHeadBs())
1340 return C !=
nullptr;
1342 return C &&
C->getAPIntValue().ule(10);
1362 if (BitSize > Subtarget.
getXLen())
1366 int64_t Val = Imm.getSExtValue();
1374 if (!Subtarget.enableUnalignedScalarMem())
1391 unsigned OldShiftOpcode,
unsigned NewShiftOpcode,
1398 if (XC && OldShiftOpcode ==
ISD::SRL && XC->isOne())
1402 if (NewShiftOpcode ==
ISD::SRL &&
CC->isOne())
1414 case Instruction::Add:
1415 case Instruction::Sub:
1416 case Instruction::Mul:
1417 case Instruction::And:
1418 case Instruction::Or:
1419 case Instruction::Xor:
1420 case Instruction::FAdd:
1421 case Instruction::FSub:
1422 case Instruction::FMul:
1423 case Instruction::FDiv:
1424 case Instruction::ICmp:
1425 case Instruction::FCmp:
1427 case Instruction::Shl:
1428 case Instruction::LShr:
1429 case Instruction::AShr:
1430 case Instruction::UDiv:
1431 case Instruction::SDiv:
1432 case Instruction::URem:
1433 case Instruction::SRem:
1434 return Operand == 1;
1448 auto *II = dyn_cast<IntrinsicInst>(
I);
1452 switch (II->getIntrinsicID()) {
1453 case Intrinsic::fma:
1454 case Intrinsic::vp_fma:
1455 return Operand == 0 || Operand == 1;
1456 case Intrinsic::vp_shl:
1457 case Intrinsic::vp_lshr:
1458 case Intrinsic::vp_ashr:
1459 case Intrinsic::vp_udiv:
1460 case Intrinsic::vp_sdiv:
1461 case Intrinsic::vp_urem:
1462 case Intrinsic::vp_srem:
1463 return Operand == 1;
1465 case Intrinsic::vp_add:
1466 case Intrinsic::vp_mul:
1467 case Intrinsic::vp_and:
1468 case Intrinsic::vp_or:
1469 case Intrinsic::vp_xor:
1470 case Intrinsic::vp_fadd:
1471 case Intrinsic::vp_fmul:
1473 case Intrinsic::vp_sub:
1474 case Intrinsic::vp_fsub:
1475 case Intrinsic::vp_fdiv:
1476 return Operand == 0 || Operand == 1;
1492 for (
auto OpIdx :
enumerate(
I->operands())) {
1496 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
1498 if (!Op ||
any_of(Ops, [&](
Use *U) {
return U->get() == Op; }))
1508 for (
Use &U : Op->uses()) {
1549 if (!Subtarget.hasStdExtZfa())
1552 bool IsSupportedVT =
false;
1554 IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
1556 IsSupportedVT =
true;
1558 assert(Subtarget.hasStdExtD() &&
"Expect D extension");
1559 IsSupportedVT =
true;
1566 bool ForCodeSize)
const {
1567 bool IsLegalVT =
false;
1571 IsLegalVT = Subtarget.hasStdExtF();
1573 IsLegalVT = Subtarget.hasStdExtD();
1585 return Imm.isZero();
1588 int Cost = Imm.isNegZero()
1592 Subtarget.getFeatureBits());
1599 unsigned Index)
const {
1612 if ((ResElts * 2) != SrcElts)
1635 if (VT ==
MVT::f16 && Subtarget.hasStdExtF() &&
1647 if (VT ==
MVT::f16 && Subtarget.hasStdExtF() &&
1664 isa<ConstantSDNode>(
LHS.getOperand(1))) {
1670 ShAmt =
LHS.getValueSizeInBits() - 1 -
Log2_64(Mask);
1683 if (
auto *RHSC = dyn_cast<ConstantSDNode>(
RHS)) {
1684 int64_t
C = RHSC->getSExtValue();
1726 switch (KnownSize) {
1754 return RISCV::VRRegClassID;
1756 return RISCV::VRM2RegClassID;
1758 return RISCV::VRM4RegClassID;
1760 return RISCV::VRM8RegClassID;
1770 static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
1771 "Unexpected subreg numbering");
1772 return RISCV::sub_vrm1_0 +
Index;
1775 static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
1776 "Unexpected subreg numbering");
1777 return RISCV::sub_vrm2_0 +
Index;
1780 static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
1781 "Unexpected subreg numbering");
1782 return RISCV::sub_vrm4_0 +
Index;
1789 return RISCV::VRRegClassID;
1798std::pair<unsigned, unsigned>
1800 MVT VecVT,
MVT SubVecVT,
unsigned InsertExtractIdx,
1802 static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
1803 RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
1804 RISCV::VRM2RegClassID > RISCV::VRRegClassID),
1805 "Register classes not ordered");
1814 unsigned SubRegIdx = RISCV::NoSubRegister;
1815 for (
const unsigned RCID :
1816 {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
1817 if (VecRegClassID > RCID && SubRegClassID <= RCID) {
1821 SubRegIdx =
TRI->composeSubRegIndices(SubRegIdx,
1826 return {SubRegIdx, InsertExtractIdx};
1831bool RISCVTargetLowering::mergeStoresAfterLegalization(
EVT VT)
const {
1857unsigned RISCVTargetLowering::combineRepeatedFPDivisors()
const {
1864 "Unexpected opcode");
1866 unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
1868 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
1871 return Op.getOperand(II->
VLOperand + 1 + HasChain);
1941bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(
MVT VT)
const {
1942 return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
1951 "Expected legal fixed length vector!");
1954 unsigned MaxELen = Subtarget.
getELEN();
1987 return ::getContainerForFixedLengthVector(*
this, VT,
getSubtarget());
1994 "Expected to convert into a scalable vector!");
1995 assert(V.getValueType().isFixedLengthVector() &&
1996 "Expected a fixed length vector operand!");
2006 "Expected to convert into a fixed length vector!");
2007 assert(V.getValueType().isScalableVector() &&
2008 "Expected a scalable vector operand!");
2036static std::pair<SDValue, SDValue>
2049static std::pair<SDValue, SDValue>
2063static std::pair<SDValue, SDValue>
2087 EVT VT,
unsigned DefinedValues)
const {
2096 SDValue Src = Op.getOperand(0);
2098 MVT DstVT = Op.getSimpleValueType();
2099 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2105 if (Src.getSimpleValueType() ==
MVT::f16 && !Subtarget.hasStdExtZfh()) {
2120 Opc,
DL, DstVT, Src,
2134 MVT SrcVT = Src.getSimpleValueType();
2140 if (SatVT != DstEltVT)
2144 if (SrcEltSize > (2 * DstEltSize))
2147 MVT DstContainerVT = DstVT;
2148 MVT SrcContainerVT = SrcVT;
2154 "Expected same element count");
2163 {Src, Src, DAG.getCondCode(ISD::SETNE),
2164 DAG.getUNDEF(Mask.getValueType()), Mask, VL});
2168 if (DstEltSize > (2 * SrcEltSize)) {
2193 case ISD::VP_FROUNDEVEN:
2196 case ISD::VP_FROUNDTOZERO:
2199 case ISD::VP_FFLOOR:
2205 case ISD::VP_FROUND:
2221 MVT VT = Op.getSimpleValueType();
2226 SDValue Src = Op.getOperand(0);
2228 MVT ContainerVT = VT;
2235 if (Op->isVPOpcode()) {
2236 Mask = Op.getOperand(1);
2237 VL = Op.getOperand(2);
2259 DAG.
getUNDEF(ContainerVT), MaxValNode, VL);
2273 switch (Op.getOpcode()) {
2279 case ISD::VP_FFLOOR:
2282 case ISD::VP_FROUND:
2283 case ISD::VP_FROUNDEVEN:
2284 case ISD::VP_FROUNDTOZERO: {
2298 case ISD::VP_FNEARBYINT:
2305 if (Op.getOpcode() != ISD::VP_FNEARBYINT)
2311 Src, Src, Mask, VL);
2322 MVT VT = Op.getSimpleValueType();
2330 SDValue Src = Op.getOperand(0);
2367 return std::nullopt;
2382 unsigned NumElts = Op.getNumOperands();
2384 bool IsInteger = Op.getValueType().isInteger();
2386 std::optional<unsigned> SeqStepDenom;
2387 std::optional<int64_t> SeqStepNum, SeqAddend;
2388 std::optional<std::pair<uint64_t, unsigned>> PrevElt;
2389 unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
2390 for (
unsigned Idx = 0;
Idx < NumElts;
Idx++) {
2393 if (Op.getOperand(
Idx).isUndef())
2399 if (!isa<ConstantSDNode>(Op.getOperand(
Idx)))
2400 return std::nullopt;
2401 Val = Op.getConstantOperandVal(
Idx) &
2402 maskTrailingOnes<uint64_t>(EltSizeInBits);
2405 if (!isa<ConstantFPSDNode>(Op.getOperand(
Idx)))
2406 return std::nullopt;
2408 cast<ConstantFPSDNode>(Op.getOperand(
Idx))->getValueAPF(),
2410 Val = *ExactInteger;
2412 return std::nullopt;
2418 unsigned IdxDiff =
Idx - PrevElt->second;
2419 int64_t ValDiff =
SignExtend64(Val - PrevElt->first, EltSizeInBits);
2427 int64_t Remainder = ValDiff % IdxDiff;
2429 if (Remainder != ValDiff) {
2432 return std::nullopt;
2438 SeqStepNum = ValDiff;
2439 else if (ValDiff != SeqStepNum)
2440 return std::nullopt;
2443 SeqStepDenom = IdxDiff;
2444 else if (IdxDiff != *SeqStepDenom)
2445 return std::nullopt;
2449 if (!PrevElt || PrevElt->first != Val)
2450 PrevElt = std::make_pair(Val,
Idx);
2454 if (!SeqStepNum || !SeqStepDenom)
2455 return std::nullopt;
2459 for (
unsigned Idx = 0;
Idx < NumElts;
Idx++) {
2460 if (Op.getOperand(
Idx).isUndef())
2464 Val = Op.getConstantOperandVal(
Idx) &
2465 maskTrailingOnes<uint64_t>(EltSizeInBits);
2468 cast<ConstantFPSDNode>(Op.getOperand(
Idx))->getValueAPF(),
2472 (int64_t)(
Idx * (
uint64_t)*SeqStepNum) / *SeqStepDenom;
2473 int64_t Addend =
SignExtend64(Val - ExpectedVal, EltSizeInBits);
2476 else if (Addend != SeqAddend)
2477 return std::nullopt;
2480 assert(SeqAddend &&
"Must have an addend if we have a step");
2482 return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
2503 MVT ContainerVT = VT;
2522 MVT VT = Op.getSimpleValueType();
2531 unsigned NumElts = Op.getNumOperands();
2552 unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.
getXLen());
2553 NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.
getELEN());
2562 MVT IntegerViaVecVT =
2567 unsigned BitPos = 0, IntegerEltIdx = 0;
2570 for (
unsigned I = 0;
I < NumElts;
I++, BitPos++) {
2573 if (
I != 0 &&
I % NumViaIntegerBits == 0) {
2574 if (NumViaIntegerBits <= 32)
2575 Bits = SignExtend64<32>(Bits);
2584 bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
2585 Bits |= ((
uint64_t)BitValue << BitPos);
2590 if (NumViaIntegerBits <= 32)
2591 Bits = SignExtend64<32>(Bits);
2596 if (NumElts < NumViaIntegerBits) {
2600 assert(IntegerViaVecVT ==
MVT::v1i8 &&
"Unexpected mask vector type");
2624 assert(Splat.getValueType() == XLenVT &&
2625 "Unexpected type for i1 splat value");
2653 int64_t StepNumerator = SimpleVID->StepNumerator;
2654 unsigned StepDenominator = SimpleVID->StepDenominator;
2655 int64_t Addend = SimpleVID->Addend;
2657 assert(StepNumerator != 0 &&
"Invalid step");
2658 bool Negate =
false;
2659 int64_t SplatStepVal = StepNumerator;
2661 if (StepNumerator != 1) {
2663 Negate = StepNumerator < 0;
2665 SplatStepVal =
Log2_64(std::abs(StepNumerator));
2673 if (((StepOpcode ==
ISD::MUL && isInt<12>(SplatStepVal)) ||
2674 (StepOpcode ==
ISD::SHL && isUInt<5>(SplatStepVal))) &&
2676 (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
2679 MVT VIDContainerVT =
2687 if ((StepOpcode ==
ISD::MUL && SplatStepVal != 1) ||
2688 (StepOpcode ==
ISD::SHL && SplatStepVal != 0)) {
2691 VID = DAG.
getNode(StepOpcode,
DL, VIDVT, VID, SplatStep);
2693 if (StepDenominator != 1) {
2698 if (Addend != 0 || Negate) {
2721 const auto *BV = cast<BuildVectorSDNode>(Op);
2722 if (VT.
isInteger() && EltBitSize < 64 &&
2724 BV->getRepeatedSequence(Sequence) &&
2725 (Sequence.size() * EltBitSize) <= 64) {
2726 unsigned SeqLen = Sequence.size();
2731 "Unexpected sequence type");
2733 unsigned EltIdx = 0;
2734 uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
2738 for (
const auto &SeqV : Sequence) {
2739 if (!SeqV.isUndef())
2740 SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
2741 << (EltIdx * EltBitSize));
2748 SplatValue = SignExtend64<32>(SplatValue);
2755 "Unexpected bitcast sequence");
2756 if (ViaIntVT.
bitsLE(XLenVT) || isInt<32>(SplatValue)) {
2759 MVT ViaContainerVT =
2779 unsigned MostCommonCount = 0;
2781 unsigned NumUndefElts =
2782 count_if(Op->op_values(), [](
const SDValue &V) { return V.isUndef(); });
2789 unsigned NumScalarLoads = 0;
2791 for (
SDValue V : Op->op_values()) {
2795 ValueCounts.
insert(std::make_pair(V, 0));
2796 unsigned &Count = ValueCounts[V];
2798 if (
auto *CFP = dyn_cast<ConstantFPSDNode>(V))
2799 NumScalarLoads += !CFP->isExactlyValue(+0.0);
2804 if (++Count >= MostCommonCount) {
2806 MostCommonCount = Count;
2810 assert(DominantValue &&
"Not expecting an all-undef BUILD_VECTOR");
2811 unsigned NumDefElts = NumElts - NumUndefElts;
2812 unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
2817 ((MostCommonCount > DominantValueCountThreshold) ||
2824 for (
const auto &OpIdx :
enumerate(Op->ops())) {
2825 const SDValue &V = OpIdx.value();
2826 if (V.isUndef() || !Processed.insert(V).second)
2828 if (ValueCounts[V] == 1) {
2837 return DAG.getConstant(V == V1, DL, XLenVT);
2856 if (isa<ConstantSDNode>(
Lo) && isa<ConstantSDNode>(
Hi)) {
2857 int32_t LoC = cast<ConstantSDNode>(
Lo)->getSExtValue();
2858 int32_t HiC = cast<ConstantSDNode>(
Hi)->getSExtValue();
2861 if ((LoC >> 31) == HiC)
2866 auto *Const = dyn_cast<ConstantSDNode>(VL);
2867 if (LoC == HiC && Const && Const->isAllOnes()) {
2903 bool HasPassthru = Passthru && !Passthru.
isUndef();
2904 if (!HasPassthru && !Passthru)
2916 if (Scalar.getValueType().bitsLE(XLenVT)) {
2923 Scalar = DAG.
getNode(ExtOpc,
DL, XLenVT, Scalar);
2928 (!Const ||
isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
2934 "Unexpected scalar for splat lowering!");
2946 "Unexpected vector MVT");
2966 auto InnerVT = VT.
bitsLE(M1VT) ? VT : M1VT;
2968 DAG.
getUNDEF(InnerVT), Scalar, VL);
2979 if (!Scalar.getValueType().bitsLE(XLenVT))
2982 VT,
DL, DAG, Subtarget);
2990 Scalar = DAG.
getNode(ExtOpc,
DL, XLenVT, Scalar);
2995 if (!
isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) &&
3001 auto InnerVT = VT.
bitsLE(M1VT) ? VT : M1VT;
3003 DAG.
getUNDEF(InnerVT), Scalar, VL);
3033 if (Src != V2.getOperand(0))
3037 if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
3042 V2.getConstantOperandVal(1) != Mask.size())
3046 if (Mask[0] != 0 && Mask[0] != 1)
3051 for (
unsigned i = 1; i != Mask.size(); ++i)
3052 if (Mask[i] != Mask[i - 1] + 2)
3070 int Size = Mask.size();
3077 EvenSrc = StartIndexes[0] % 2 ? StartIndexes[1] : StartIndexes[0];
3078 OddSrc = StartIndexes[0] % 2 ? StartIndexes[0] : StartIndexes[1];
3081 if (EvenSrc != 0 && OddSrc != 0)
3100 int Size = Mask.size();
3112 for (
int i = 0; i !=
Size; ++i) {
3118 int StartIdx = i - (M %
Size);
3126 int CandidateRotation = StartIdx < 0 ? -StartIdx :
Size - StartIdx;
3129 Rotation = CandidateRotation;
3130 else if (Rotation != CandidateRotation)
3135 int MaskSrc = M <
Size ? 0 : 1;
3140 int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
3145 TargetSrc = MaskSrc;
3146 else if (TargetSrc != MaskSrc)
3153 assert(Rotation != 0 &&
"Failed to locate a viable rotation!");
3154 assert((LoSrc >= 0 || HiSrc >= 0) &&
3155 "Failed to find a rotated input vector!");
3170 MVT ContainerVT = VT;
3173 assert(Src.getSimpleValueType().isFixedLengthVector());
3177 MVT SrcContainerVT =
3190 Src = DAG.
getBitcast(WideSrcContainerVT, Src);
3197 unsigned Shift = EvenElts ? 0 : EltBits;
3203 DAG.
getUNDEF(IntContainerVT), TrueMask, VL);
3217 if (
Merge.isUndef())
3229 if (
Merge.isUndef())
3253 auto findNonEXTRACT_SUBVECTORParent =
3254 [](
SDValue Parent) -> std::pair<SDValue, uint64_t> {
3259 Parent.getOperand(0).getSimpleValueType().isFixedLengthVector()) {
3260 Offset += Parent.getConstantOperandVal(1);
3261 Parent = Parent.getOperand(0);
3263 return std::make_pair(Parent,
Offset);
3266 auto [V1Src, V1IndexOffset] = findNonEXTRACT_SUBVECTORParent(V1);
3267 auto [V2Src, V2IndexOffset] = findNonEXTRACT_SUBVECTORParent(V2);
3276 for (
size_t i = 0; i != NewMask.
size(); ++i) {
3277 if (NewMask[i] == -1)
3280 if (
static_cast<size_t>(NewMask[i]) < NewMask.
size()) {
3281 NewMask[i] = NewMask[i] + V1IndexOffset;
3285 NewMask[i] = NewMask[i] - NewMask.
size() + V2IndexOffset;
3291 if (NewMask[0] <= 0)
3295 for (
unsigned i = 1; i != NewMask.
size(); ++i)
3296 if (NewMask[i - 1] + 1 != NewMask[i])
3300 MVT SrcVT = Src.getSimpleValueType();
3320 MVT VecContainerVT = VecVT;
3337 MVT WideContainerVT = WideVT;
3343 EvenV = DAG.
getBitcast(VecContainerVT, EvenV);
3352 EvenV, OddV, Passthru, Mask, VL);
3358 AllOnesVec, Passthru, Mask, VL);
3366 OddsMul, Passthru, Mask, VL);
3372 Interleaved = DAG.
getBitcast(ResultContainerVT, Interleaved);
3387 SDValue V1 = Op.getOperand(0);
3388 SDValue V2 = Op.getOperand(1);
3391 MVT VT = Op.getSimpleValueType();
3412 V.getOperand(0).getSimpleValueType().getVectorNumElements();
3413 V = V.getOperand(
Offset / OpElements);
3419 auto *Ld = cast<LoadSDNode>(V);
3429 SDValue Ops[] = {Ld->getChain(),
3447 V = DAG.
getLoad(SVT,
DL, Ld->getChain(), NewAddr,
3448 Ld->getPointerInfo().getWithOffset(
Offset),
3449 Ld->getOriginalAlign(),
3453 Ld->getPointerInfo().getWithOffset(
Offset), SVT,
3454 Ld->getOriginalAlign(),
3455 Ld->getMemOperand()->getFlags());
3466 assert(Lane < (
int)NumElts &&
"Unexpected lane!");
3469 DAG.
getUNDEF(ContainerVT), TrueMask, VL);
3487 LoV = LoSrc == 0 ? V1 : V2;
3491 HiV = HiSrc == 0 ? V1 : V2;
3497 unsigned InvRotate = NumElts - Rotation;
3513 Res =
getVSlideup(DAG, Subtarget,
DL, ContainerVT, Res, LoV,
3529 int EvenSrc, OddSrc;
3534 int Size = Mask.size();
3536 assert(EvenSrc >= 0 &&
"Undef source?");
3537 EvenV = (EvenSrc /
Size) == 0 ? V1 : V2;
3541 assert(OddSrc >= 0 &&
"Undef source?");
3542 OddV = (OddSrc /
Size) == 0 ? V1 : V2;
3553 int MaskIndex = MaskIdx.value();
3554 return MaskIndex < 0 || MaskIdx.index() == (
unsigned)MaskIndex % NumElts;
3557 assert(!V1.
isUndef() &&
"Unexpected shuffle canonicalization");
3569 bool InvertMask = IsSelect == SwapOps;
3578 for (
int MaskIndex : Mask) {
3579 bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
3582 bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
3583 GatherIndicesLHS.
push_back(IsLHSOrUndefIndex && MaskIndex >= 0
3587 IsLHSOrUndefIndex ? DAG.
getUNDEF(XLenVT)
3589 if (IsLHSOrUndefIndex && MaskIndex >= 0)
3590 ++LHSIndexCounts[MaskIndex];
3591 if (!IsLHSOrUndefIndex)
3592 ++RHSIndexCounts[MaskIndex - NumElts];
3598 std::swap(GatherIndicesLHS, GatherIndicesRHS);
3601 assert(MaskVals.
size() == NumElts &&
"Unexpected select-like shuffle");
3627 MVT IndexContainerVT =
3641 if (LHSIndexCounts.
size() == 1) {
3642 int SplatIndex = LHSIndexCounts.
begin()->getFirst();
3643 Gather = DAG.
getNode(GatherVXOpc,
DL, ContainerVT, V1,
3645 DAG.
getUNDEF(ContainerVT), TrueMask, VL);
3651 Gather = DAG.
getNode(GatherVVOpc,
DL, ContainerVT, V1, LHSIndices,
3652 DAG.
getUNDEF(ContainerVT), TrueMask, VL);
3658 if (!V2.isUndef()) {
3668 if (RHSIndexCounts.
size() == 1) {
3669 int SplatIndex = RHSIndexCounts.
begin()->getFirst();
3670 Gather = DAG.
getNode(GatherVXOpc,
DL, ContainerVT, V2,
3677 Gather = DAG.
getNode(GatherVVOpc,
DL, ContainerVT, V2, RHSIndices, Gather,
3704RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(
SDValue Op,
3706 MVT VT = Op.getSimpleValueType();
3708 SDValue Src = Op.getOperand(0);
3723 "Expected legal float type!");
3734 if (FloatVT.
bitsGT(VT)) {
3738 MVT ContainerVT = VT;
3747 MVT ContainerFloatVT =
3750 Src, Mask, RTZRM, VL);
3757 unsigned ShiftAmt = FloatEltVT ==
MVT::f64 ? 52 : 23;
3763 else if (IntVT.
bitsGT(VT))
3766 unsigned ExponentBias = FloatEltVT ==
MVT::f64 ? 1023 : 127;
3775 unsigned Adjust = ExponentBias + (EltSize - 1);
3791 auto *
Load = cast<LoadSDNode>(Op);
3792 assert(Load &&
Load->getMemoryVT().isVector() &&
"Expected vector load");
3795 Load->getMemoryVT(),
3796 *
Load->getMemOperand()))
3800 MVT VT =
Op.getSimpleValueType();
3802 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3803 "Unexpected unaligned RVV load type");
3807 "Expecting equally-sized RVV vector types to be legal");
3809 Load->getPointerInfo(),
Load->getOriginalAlign(),
3810 Load->getMemOperand()->getFlags());
3820 auto *
Store = cast<StoreSDNode>(Op);
3821 assert(Store &&
Store->getValue().getValueType().isVector() &&
3822 "Expected vector store");
3825 Store->getMemoryVT(),
3826 *
Store->getMemOperand()))
3833 assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
3834 "Unexpected unaligned RVV store type");
3838 "Expecting equally-sized RVV vector types to be legal");
3839 StoredVal = DAG.
getBitcast(NewVT, StoredVal);
3841 Store->getPointerInfo(),
Store->getOriginalAlign(),
3842 Store->getMemOperand()->getFlags());
3849 int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
3879 if (Subtarget.hasStdExtZtso()) {
3902 switch (Op.getOpcode()) {
3908 return lowerGlobalAddress(Op, DAG);
3910 return lowerBlockAddress(Op, DAG);
3912 return lowerConstantPool(Op, DAG);
3914 return lowerJumpTable(Op, DAG);
3916 return lowerGlobalTLSAddress(Op, DAG);
3920 return lowerSELECT(Op, DAG);
3922 return lowerBRCOND(Op, DAG);
3924 return lowerVASTART(Op, DAG);
3926 return lowerFRAMEADDR(Op, DAG);
3928 return lowerRETURNADDR(Op, DAG);
3930 return lowerShiftLeftParts(Op, DAG);
3932 return lowerShiftRightParts(Op, DAG,
true);
3934 return lowerShiftRightParts(Op, DAG,
false);
3937 EVT VT = Op.getValueType();
3938 SDValue Op0 = Op.getOperand(0);
3948 Subtarget.hasStdExtF()) {
3955 Subtarget.hasStdExtZfa()) {
3974 "Unexpected types");
4008 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4010 return LowerINTRINSIC_W_CHAIN(Op, DAG);
4012 return LowerINTRINSIC_VOID(Op, DAG);
4014 MVT VT = Op.getSimpleValueType();
4016 assert(Subtarget.hasStdExtZbkb() &&
"Unexpected custom legalization");
4024 if (!Op.getSimpleValueType().isVector())
4026 return lowerVectorTruncLike(Op, DAG);
4029 if (Op.getOperand(0).getValueType().isVector() &&
4030 Op.getOperand(0).getValueType().getVectorElementType() ==
MVT::i1)
4031 return lowerVectorMaskExt(Op, DAG, 1);
4034 if (Op.getOperand(0).getValueType().isVector() &&
4035 Op.getOperand(0).getValueType().getVectorElementType() ==
MVT::i1)
4036 return lowerVectorMaskExt(Op, DAG, -1);
4039 return lowerSPLAT_VECTOR_PARTS(Op, DAG);
4041 return lowerINSERT_VECTOR_ELT(Op, DAG);
4043 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
4045 MVT VT = Op.getSimpleValueType();
4056 uint64_t Val = Op.getConstantOperandVal(0);
4081 Op.getOperand(1).getValueType() ==
MVT::i32) {
4093 if (!Op.getValueType().isVector())
4095 return lowerVectorFPExtendOrRoundLike(Op, DAG);
4097 return lowerStrictFPExtend(Op, DAG);
4105 MVT VT = Op.getSimpleValueType();
4109 SDValue Src = Op.getOperand(0);
4111 MVT SrcVT = Src.getSimpleValueType();
4116 "Unexpected vector element types");
4120 if (EltSize > (2 * SrcEltSize)) {
4129 return DAG.
getNode(Op.getOpcode(),
DL, VT, Ext);
4132 assert(SrcEltVT ==
MVT::f16 &&
"Unexpected FP_TO_[US]INT lowering");
4137 return DAG.
getNode(Op.getOpcode(),
DL, VT, FExt);
4141 if (SrcEltSize > (2 * EltSize)) {
4164 unsigned RVVOpc = 0;
4165 switch (Op.getOpcode()) {