18#include "llvm/IR/IntrinsicsRISCV.h"
25#define DEBUG_TYPE "riscvtti"
28 "riscv-v-register-bit-width-lmul",
30 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
31 "by autovectorized code. Fractional LMULs are not supported."),
37 "Overrides result used for getMaximumVF query which is used "
38 "exclusively by SLP vectorizer."),
43 cl::desc(
"Set the lower bound of a trip count to decide on "
44 "vectorization while tail-folding."),
53 size_t NumInstr = OpCodes.size();
58 return LMULCost * NumInstr;
60 for (
auto Op : OpCodes) {
62 case RISCV::VRGATHER_VI:
65 case RISCV::VRGATHER_VV:
68 case RISCV::VSLIDEUP_VI:
69 case RISCV::VSLIDEDOWN_VI:
72 case RISCV::VSLIDEUP_VX:
73 case RISCV::VSLIDEDOWN_VX:
76 case RISCV::VREDMAX_VS:
77 case RISCV::VREDMIN_VS:
78 case RISCV::VREDMAXU_VS:
79 case RISCV::VREDMINU_VS:
80 case RISCV::VREDSUM_VS:
81 case RISCV::VREDAND_VS:
82 case RISCV::VREDOR_VS:
83 case RISCV::VREDXOR_VS:
84 case RISCV::VFREDMAX_VS:
85 case RISCV::VFREDMIN_VS:
86 case RISCV::VFREDUSUM_VS: {
93 case RISCV::VFREDOSUM_VS: {
102 case RISCV::VFMV_F_S:
103 case RISCV::VFMV_S_F:
105 case RISCV::VMXOR_MM:
106 case RISCV::VMAND_MM:
107 case RISCV::VMANDN_MM:
108 case RISCV::VMNAND_MM:
110 case RISCV::VFIRST_M:
125 assert(Ty->isIntegerTy() &&
126 "getIntImmCost can only estimate cost of materialising integers");
149 if (!BO || !BO->hasOneUse())
152 if (BO->getOpcode() != Instruction::Shl)
163 if (ShAmt == Trailing)
180 if (!Cmp || !Cmp->isEquality())
196 if ((CmpC & Mask) != CmpC)
203 return NewCmpC >= -2048 && NewCmpC <= 2048;
210 assert(Ty->isIntegerTy() &&
211 "getIntImmCost can only estimate cost of materialising integers");
219 bool Takes12BitImm =
false;
220 unsigned ImmArgIdx = ~0U;
223 case Instruction::GetElementPtr:
228 case Instruction::Store: {
233 if (Idx == 1 || !Inst)
238 if (!getTLI()->allowsMemoryAccessForAlignment(
246 case Instruction::Load:
249 case Instruction::And:
251 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
254 if (Imm == UINT64_C(0xffffffff) &&
255 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
258 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
260 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
263 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
266 Takes12BitImm =
true;
268 case Instruction::Add:
269 Takes12BitImm =
true;
271 case Instruction::Or:
272 case Instruction::Xor:
274 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
276 Takes12BitImm =
true;
278 case Instruction::Mul:
280 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
283 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
286 Takes12BitImm =
true;
288 case Instruction::Sub:
289 case Instruction::Shl:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Takes12BitImm =
true;
303 if (Imm.getSignificantBits() <= 64 &&
326 return ST->hasVInstructions();
336 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
343 if (!ST->hasStdExtZvqdotq() || ST->getELen() < 64 ||
344 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
345 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
353 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
CostKind);
360 switch (
II->getIntrinsicID()) {
364 case Intrinsic::vector_reduce_mul:
365 case Intrinsic::vector_reduce_fmul:
371 if (ST->hasVInstructions())
377 if (ST->hasVInstructions())
378 if (
unsigned MinVLen = ST->getRealMinVLen();
393 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
396 (ST->hasVInstructions() &&
406RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
416 unsigned Size = Mask.size();
419 for (
unsigned I = 0;
I !=
Size; ++
I) {
420 if (
static_cast<unsigned>(Mask[
I]) ==
I)
426 for (
unsigned J =
I + 1; J !=
Size; ++J)
428 if (
static_cast<unsigned>(Mask[J]) != J %
I)
456 "Expected fixed vector type and non-empty mask");
459 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
463 if (NumOfDests <= 1 ||
465 Tp->getElementType()->getPrimitiveSizeInBits() ||
466 LegalNumElts >= Tp->getElementCount().getFixedValue())
469 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
472 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
476 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
477 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
478 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
480 assert(NormalizedVF >= Mask.size() &&
481 "Normalized mask expected to be not shorter than original mask.");
486 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
487 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
490 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
493 Cost +=
TTI.getShuffleCost(
496 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
498 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
499 Cost +=
TTI.getShuffleCost(
502 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
525 if (!VLen || Mask.empty())
529 LegalVT =
TTI.getTypeLegalizationCost(
535 if (NumOfDests <= 1 ||
537 Tp->getElementType()->getPrimitiveSizeInBits() ||
541 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
544 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
550 unsigned NormalizedVF =
555 assert(NormalizedVF >= Mask.size() &&
556 "Normalized mask expected to be not shorter than original mask.");
562 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
563 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
566 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
571 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
573 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
575 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
582 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
583 (NumOfDestRegs <= 2 && NumShuffles < 4))
598 if (!
LT.second.isFixedLengthVector())
606 auto GetSlideOpcode = [&](
int SlideAmt) {
608 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
610 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
611 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
614 std::array<std::pair<int, int>, 2> SrcInfo;
618 if (SrcInfo[1].second == 0)
622 if (SrcInfo[0].second != 0) {
623 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
624 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
627 if (SrcInfo[1].first == -1)
628 return FirstSlideCost;
631 if (SrcInfo[1].second != 0) {
632 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
633 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
636 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
643 return FirstSlideCost + SecondSlideCost + MaskCost;
654 "Expected the Mask to match the return size if given");
656 "Expected the same scalar types");
665 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
667 *
this, LT.second, ST->getRealVLen(),
669 if (VRegSplittingCost.
isValid())
670 return VRegSplittingCost;
675 if (Mask.size() >= 2) {
676 MVT EltTp = LT.second.getVectorElementType();
687 return 2 * LT.first * TLI->getLMULCost(LT.second);
689 if (Mask[0] == 0 || Mask[0] == 1) {
693 if (
equal(DeinterleaveMask, Mask))
694 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
699 if (LT.second.getScalarSizeInBits() != 1 &&
702 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
704 for (
unsigned I = 0;
I != NumSlides; ++
I) {
705 unsigned InsertIndex = SubVectorSize * (1 <<
I);
710 std::pair<InstructionCost, MVT> DestLT =
715 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
729 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
730 LT.second.getVectorNumElements() <= 256)) {
735 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
749 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
750 LT.second.getVectorNumElements() <= 256)) {
751 auto &
C = SrcTy->getContext();
752 auto EC = SrcTy->getElementCount();
757 return 2 * IndexCost +
758 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
777 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
805 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
806 if (std::optional<unsigned> VLen = ST->getRealVLen();
807 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
808 SubLT.second.getSizeInBits() <= *VLen)
816 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
823 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
835 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
840 Instruction::InsertElement);
841 if (LT.second.getScalarSizeInBits() == 1) {
849 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
862 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
863 RISCV::VMV_X_S, RISCV::VMV_V_X,
872 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
878 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
884 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
885 if (Index >= 0 && Index < 32)
886 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
887 else if (Index < 0 && Index > -32)
888 Opcodes[1] = RISCV::VSLIDEUP_VI;
889 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
893 if (!LT.second.isVector())
899 if (SrcTy->getElementType()->isIntegerTy(1)) {
911 MVT ContainerVT = LT.second;
912 if (LT.second.isFixedLengthVector())
913 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
915 if (ContainerVT.
bitsLE(M1VT)) {
925 if (LT.second.isFixedLengthVector())
927 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
928 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
929 if (LT.second.isFixedLengthVector() &&
930 isInt<5>(LT.second.getVectorNumElements() - 1))
931 Opcodes[1] = RISCV::VRSUB_VI;
933 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
934 return LT.first * (LenCost + GatherCost);
941 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
943 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
947 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
949 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
950 return FixedCost + LT.first * (GatherCost + SlideCost);
984 Ty, DemandedElts, Insert, Extract,
CostKind);
986 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
987 if (Ty->getScalarSizeInBits() == 1) {
997 assert(LT.second.isFixedLengthVector());
998 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
1002 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
1015 switch (MICA.
getID()) {
1016 case Intrinsic::vp_load_ff: {
1017 EVT DataTypeVT = TLI->getValueType(
DL, DataTy);
1018 if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
1025 case Intrinsic::experimental_vp_strided_load:
1026 case Intrinsic::experimental_vp_strided_store:
1028 case Intrinsic::masked_compressstore:
1029 case Intrinsic::masked_expandload:
1031 case Intrinsic::vp_scatter:
1032 case Intrinsic::vp_gather:
1033 case Intrinsic::masked_scatter:
1034 case Intrinsic::masked_gather:
1036 case Intrinsic::vp_load:
1037 case Intrinsic::vp_store:
1038 case Intrinsic::masked_load:
1039 case Intrinsic::masked_store:
1048 unsigned Opcode = MICA.
getID() == Intrinsic::masked_load ? Instruction::Load
1049 : Instruction::Store;
1064 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1070 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1074 if (LT.second.isVector()) {
1077 VTy->getElementCount().divideCoefficientBy(Factor));
1078 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1079 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1084 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1087 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1088 Cost += Factor * TLI->getLMULCost(SubVecVT);
1089 return LT.first *
Cost;
1096 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1097 unsigned NumLoads = getEstimatedVLFor(VTy);
1098 return NumLoads * MemOpCost;
1111 unsigned VF = FVTy->getNumElements() / Factor;
1118 if (Opcode == Instruction::Load) {
1120 for (
unsigned Index : Indices) {
1124 Mask.resize(VF * Factor, -1);
1128 Cost += ShuffleCost;
1146 UseMaskForCond, UseMaskForGaps);
1148 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1155 return MemCost + ShuffleCost;
1162 bool IsLoad = MICA.
getID() == Intrinsic::masked_gather ||
1163 MICA.
getID() == Intrinsic::vp_gather;
1164 unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
1171 if ((Opcode == Instruction::Load &&
1173 (Opcode == Instruction::Store &&
1183 {TTI::OK_AnyValue, TTI::OP_None},
I);
1184 unsigned NumLoads = getEstimatedVLFor(&VTy);
1185 return NumLoads * MemOpCost;
1191 unsigned Opcode = MICA.
getID() == Intrinsic::masked_expandload
1193 : Instruction::Store;
1197 bool IsLegal = (Opcode == Instruction::Store &&
1199 (Opcode == Instruction::Load &&
1223 if (Opcode == Instruction::Store)
1224 Opcodes.
append({RISCV::VCOMPRESS_VM});
1226 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1228 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1235 unsigned Opcode = MICA.
getID() == Intrinsic::experimental_vp_strided_load
1237 : Instruction::Store;
1255 {TTI::OK_AnyValue, TTI::OP_None},
I);
1256 unsigned NumLoads = getEstimatedVLFor(&VTy);
1257 return NumLoads * MemOpCost;
1267 for (
auto *Ty : Tys) {
1268 if (!Ty->isVectorTy())
1282 {Intrinsic::floor, MVT::f32, 9},
1283 {Intrinsic::floor, MVT::f64, 9},
1284 {Intrinsic::ceil, MVT::f32, 9},
1285 {Intrinsic::ceil, MVT::f64, 9},
1286 {Intrinsic::trunc, MVT::f32, 7},
1287 {Intrinsic::trunc, MVT::f64, 7},
1288 {Intrinsic::round, MVT::f32, 9},
1289 {Intrinsic::round, MVT::f64, 9},
1290 {Intrinsic::roundeven, MVT::f32, 9},
1291 {Intrinsic::roundeven, MVT::f64, 9},
1292 {Intrinsic::rint, MVT::f32, 7},
1293 {Intrinsic::rint, MVT::f64, 7},
1294 {Intrinsic::nearbyint, MVT::f32, 9},
1295 {Intrinsic::nearbyint, MVT::f64, 9},
1296 {Intrinsic::bswap, MVT::i16, 3},
1297 {Intrinsic::bswap, MVT::i32, 12},
1298 {Intrinsic::bswap, MVT::i64, 31},
1299 {Intrinsic::vp_bswap, MVT::i16, 3},
1300 {Intrinsic::vp_bswap, MVT::i32, 12},
1301 {Intrinsic::vp_bswap, MVT::i64, 31},
1302 {Intrinsic::vp_fshl, MVT::i8, 7},
1303 {Intrinsic::vp_fshl, MVT::i16, 7},
1304 {Intrinsic::vp_fshl, MVT::i32, 7},
1305 {Intrinsic::vp_fshl, MVT::i64, 7},
1306 {Intrinsic::vp_fshr, MVT::i8, 7},
1307 {Intrinsic::vp_fshr, MVT::i16, 7},
1308 {Intrinsic::vp_fshr, MVT::i32, 7},
1309 {Intrinsic::vp_fshr, MVT::i64, 7},
1310 {Intrinsic::bitreverse, MVT::i8, 17},
1311 {Intrinsic::bitreverse, MVT::i16, 24},
1312 {Intrinsic::bitreverse, MVT::i32, 33},
1313 {Intrinsic::bitreverse, MVT::i64, 52},
1314 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1315 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1316 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1317 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1318 {Intrinsic::ctpop, MVT::i8, 12},
1319 {Intrinsic::ctpop, MVT::i16, 19},
1320 {Intrinsic::ctpop, MVT::i32, 20},
1321 {Intrinsic::ctpop, MVT::i64, 21},
1322 {Intrinsic::ctlz, MVT::i8, 19},
1323 {Intrinsic::ctlz, MVT::i16, 28},
1324 {Intrinsic::ctlz, MVT::i32, 31},
1325 {Intrinsic::ctlz, MVT::i64, 35},
1326 {Intrinsic::cttz, MVT::i8, 16},
1327 {Intrinsic::cttz, MVT::i16, 23},
1328 {Intrinsic::cttz, MVT::i32, 24},
1329 {Intrinsic::cttz, MVT::i64, 25},
1330 {Intrinsic::vp_ctpop, MVT::i8, 12},
1331 {Intrinsic::vp_ctpop, MVT::i16, 19},
1332 {Intrinsic::vp_ctpop, MVT::i32, 20},
1333 {Intrinsic::vp_ctpop, MVT::i64, 21},
1334 {Intrinsic::vp_ctlz, MVT::i8, 19},
1335 {Intrinsic::vp_ctlz, MVT::i16, 28},
1336 {Intrinsic::vp_ctlz, MVT::i32, 31},
1337 {Intrinsic::vp_ctlz, MVT::i64, 35},
1338 {Intrinsic::vp_cttz, MVT::i8, 16},
1339 {Intrinsic::vp_cttz, MVT::i16, 23},
1340 {Intrinsic::vp_cttz, MVT::i32, 24},
1341 {Intrinsic::vp_cttz, MVT::i64, 25},
1348 switch (ICA.
getID()) {
1349 case Intrinsic::lrint:
1350 case Intrinsic::llrint:
1351 case Intrinsic::lround:
1352 case Intrinsic::llround: {
1356 if (ST->hasVInstructions() && LT.second.isVector()) {
1358 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1359 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1360 if (LT.second.getVectorElementType() == MVT::bf16) {
1361 if (!ST->hasVInstructionsBF16Minimal())
1364 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1366 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1367 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1368 !ST->hasVInstructionsF16()) {
1369 if (!ST->hasVInstructionsF16Minimal())
1372 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1374 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1376 }
else if (SrcEltSz > DstEltSz) {
1377 Ops = {RISCV::VFNCVT_X_F_W};
1378 }
else if (SrcEltSz < DstEltSz) {
1379 Ops = {RISCV::VFWCVT_X_F_V};
1381 Ops = {RISCV::VFCVT_X_F_V};
1386 if (SrcEltSz > DstEltSz)
1387 return SrcLT.first *
1388 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1389 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1393 case Intrinsic::ceil:
1394 case Intrinsic::floor:
1395 case Intrinsic::trunc:
1396 case Intrinsic::rint:
1397 case Intrinsic::round:
1398 case Intrinsic::roundeven: {
1401 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
1402 return LT.first * 8;
1405 case Intrinsic::umin:
1406 case Intrinsic::umax:
1407 case Intrinsic::smin:
1408 case Intrinsic::smax: {
1410 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1413 if (ST->hasVInstructions() && LT.second.isVector()) {
1415 switch (ICA.
getID()) {
1416 case Intrinsic::umin:
1417 Op = RISCV::VMINU_VV;
1419 case Intrinsic::umax:
1420 Op = RISCV::VMAXU_VV;
1422 case Intrinsic::smin:
1423 Op = RISCV::VMIN_VV;
1425 case Intrinsic::smax:
1426 Op = RISCV::VMAX_VV;
1429 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1433 case Intrinsic::sadd_sat:
1434 case Intrinsic::ssub_sat:
1435 case Intrinsic::uadd_sat:
1436 case Intrinsic::usub_sat: {
1438 if (ST->hasVInstructions() && LT.second.isVector()) {
1440 switch (ICA.
getID()) {
1441 case Intrinsic::sadd_sat:
1442 Op = RISCV::VSADD_VV;
1444 case Intrinsic::ssub_sat:
1445 Op = RISCV::VSSUBU_VV;
1447 case Intrinsic::uadd_sat:
1448 Op = RISCV::VSADDU_VV;
1450 case Intrinsic::usub_sat:
1451 Op = RISCV::VSSUBU_VV;
1454 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1458 case Intrinsic::fma:
1459 case Intrinsic::fmuladd: {
1462 if (ST->hasVInstructions() && LT.second.isVector())
1464 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1467 case Intrinsic::fabs: {
1469 if (ST->hasVInstructions() && LT.second.isVector()) {
1475 if (LT.second.getVectorElementType() == MVT::bf16 ||
1476 (LT.second.getVectorElementType() == MVT::f16 &&
1477 !ST->hasVInstructionsF16()))
1478 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1483 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1487 case Intrinsic::sqrt: {
1489 if (ST->hasVInstructions() && LT.second.isVector()) {
1492 MVT ConvType = LT.second;
1493 MVT FsqrtType = LT.second;
1496 if (LT.second.getVectorElementType() == MVT::bf16) {
1497 if (LT.second == MVT::nxv32bf16) {
1498 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1499 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1500 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1501 ConvType = MVT::nxv16f16;
1502 FsqrtType = MVT::nxv16f32;
1504 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1505 FsqrtOp = {RISCV::VFSQRT_V};
1506 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1508 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1509 !ST->hasVInstructionsF16()) {
1510 if (LT.second == MVT::nxv32f16) {
1511 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1512 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1513 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1514 ConvType = MVT::nxv16f16;
1515 FsqrtType = MVT::nxv16f32;
1517 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1518 FsqrtOp = {RISCV::VFSQRT_V};
1519 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1522 FsqrtOp = {RISCV::VFSQRT_V};
1525 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1526 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1530 case Intrinsic::cttz:
1531 case Intrinsic::ctlz:
1532 case Intrinsic::ctpop: {
1534 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1536 switch (ICA.
getID()) {
1537 case Intrinsic::cttz:
1540 case Intrinsic::ctlz:
1543 case Intrinsic::ctpop:
1544 Op = RISCV::VCPOP_V;
1547 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1551 case Intrinsic::abs: {
1553 if (ST->hasVInstructions() && LT.second.isVector()) {
1557 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1562 case Intrinsic::fshl:
1563 case Intrinsic::fshr: {
1570 if ((ST->hasStdExtZbb() || ST->hasStdExtZbkb()) && RetTy->isIntegerTy() &&
1572 (RetTy->getIntegerBitWidth() == 32 ||
1573 RetTy->getIntegerBitWidth() == 64) &&
1574 RetTy->getIntegerBitWidth() <= ST->getXLen()) {
1579 case Intrinsic::get_active_lane_mask: {
1580 if (ST->hasVInstructions()) {
1589 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1595 case Intrinsic::stepvector: {
1599 if (ST->hasVInstructions())
1600 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1602 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1603 return 1 + (LT.first - 1);
1605 case Intrinsic::experimental_cttz_elts: {
1607 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1608 if (getTLI()->shouldExpandCttzElements(ArgType))
1625 case Intrinsic::experimental_vp_splat: {
1628 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
1630 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1635 case Intrinsic::experimental_vp_splice: {
1643 case Intrinsic::fptoui_sat:
1644 case Intrinsic::fptosi_sat: {
1646 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1651 if (!SrcTy->isVectorTy())
1654 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1673 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1675 LT.second.isVector()) {
1676 MVT EltTy = LT.second.getVectorElementType();
1678 ICA.
getID(), EltTy))
1679 return LT.first * Entry->Cost;
1692 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1710 if (ST->enablePExtCodeGen() &&
1718 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1719 Dst->getScalarSizeInBits() > ST->getELen())
1722 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1737 if (Src->getScalarSizeInBits() == 1) {
1742 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1743 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1749 if (Dst->getScalarSizeInBits() == 1) {
1755 return SrcLT.first *
1756 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1768 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1769 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1771 SrcLT.second.getSizeInBits()) ||
1773 DstLT.second.getSizeInBits()) ||
1774 SrcLT.first > 1 || DstLT.first > 1)
1778 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1780 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1781 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1785 if ((PowDiff < 1) || (PowDiff > 3))
1787 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1788 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1791 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1794 case ISD::FP_EXTEND:
1797 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1798 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1801 : (
ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
1802 : RISCV::VFNCVT_F_F_W;
1804 for (; SrcEltSize != DstEltSize;) {
1808 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1810 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1818 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1820 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1822 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1823 unsigned SrcEltSize = Src->getScalarSizeInBits();
1824 unsigned DstEltSize = Dst->getScalarSizeInBits();
1826 if ((SrcEltSize == 16) &&
1827 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1833 std::pair<InstructionCost, MVT> VecF32LT =
1836 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1841 if (DstEltSize == SrcEltSize)
1842 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1843 else if (DstEltSize > SrcEltSize)
1844 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1849 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1850 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1851 if ((SrcEltSize / 2) > DstEltSize) {
1862 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1863 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1864 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1865 unsigned SrcEltSize = Src->getScalarSizeInBits();
1866 unsigned DstEltSize = Dst->getScalarSizeInBits();
1869 if ((DstEltSize == 16) &&
1870 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1876 std::pair<InstructionCost, MVT> VecF32LT =
1879 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1884 if (DstEltSize == SrcEltSize)
1885 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1886 else if (DstEltSize > SrcEltSize) {
1887 if ((DstEltSize / 2) > SrcEltSize) {
1891 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1894 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1896 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1903unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1905 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
1906 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
1921 if (Ty->getScalarSizeInBits() > ST->getELen())
1925 if (Ty->getElementType()->isIntegerTy(1)) {
1929 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1935 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1939 case Intrinsic::maximum:
1941 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1943 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1958 case Intrinsic::minimum:
1960 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1962 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1968 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
1977 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1986 case Intrinsic::smax:
1987 SplitOp = RISCV::VMAX_VV;
1988 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1990 case Intrinsic::smin:
1991 SplitOp = RISCV::VMIN_VV;
1992 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1994 case Intrinsic::umax:
1995 SplitOp = RISCV::VMAXU_VV;
1996 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1998 case Intrinsic::umin:
1999 SplitOp = RISCV::VMINU_VV;
2000 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
2002 case Intrinsic::maxnum:
2003 SplitOp = RISCV::VFMAX_VV;
2004 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
2006 case Intrinsic::minnum:
2007 SplitOp = RISCV::VFMIN_VV;
2008 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
2013 (LT.first > 1) ? (LT.first - 1) *
2014 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2016 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2021 std::optional<FastMathFlags> FMF,
2027 if (Ty->getScalarSizeInBits() > ST->getELen())
2030 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2038 Type *ElementTy = Ty->getElementType();
2043 if (LT.second == MVT::v1i1)
2044 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
2062 return ((LT.first > 2) ? (LT.first - 2) : 0) *
2063 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
2064 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
2065 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2074 return (LT.first - 1) *
2075 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
2076 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
2084 return (LT.first - 1) *
2085 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
2086 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2099 SplitOp = RISCV::VADD_VV;
2100 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2103 SplitOp = RISCV::VOR_VV;
2104 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2107 SplitOp = RISCV::VXOR_VV;
2108 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2111 SplitOp = RISCV::VAND_VV;
2112 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2116 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2117 LT.second.getScalarType() == MVT::bf16)
2121 for (
unsigned i = 0; i < LT.first.getValue(); i++)
2124 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2126 SplitOp = RISCV::VFADD_VV;
2127 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2132 (LT.first > 1) ? (LT.first - 1) *
2133 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2135 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2139 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2150 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2156 if (IsUnsigned && Opcode == Instruction::Add &&
2157 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2161 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2168 return (LT.first - 1) +
2175 assert(OpInfo.isConstant() &&
"non constant operand?");
2182 if (OpInfo.isUniform())
2188 return getConstantPoolLoadCost(Ty,
CostKind);
2197 EVT VT = TLI->getValueType(
DL, Src,
true);
2199 if (VT == MVT::Other)
2204 if (Opcode == Instruction::Store && OpInfo.isConstant())
2219 if (Src->
isVectorTy() && LT.second.isVector() &&
2221 LT.second.getSizeInBits()))
2231 if (ST->hasVInstructions() && LT.second.isVector() &&
2233 BaseCost *= TLI->getLMULCost(LT.second);
2234 return Cost + BaseCost;
2243 Op1Info, Op2Info,
I);
2247 Op1Info, Op2Info,
I);
2250 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2252 Op1Info, Op2Info,
I);
2254 auto GetConstantMatCost =
2256 if (OpInfo.isUniform())
2261 return getConstantPoolLoadCost(ValTy,
CostKind);
2266 ConstantMatCost += GetConstantMatCost(Op1Info);
2268 ConstantMatCost += GetConstantMatCost(Op2Info);
2271 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2273 if (ValTy->getScalarSizeInBits() == 1) {
2277 return ConstantMatCost +
2279 getRISCVInstructionCost(
2280 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2284 return ConstantMatCost +
2285 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2289 if (ValTy->getScalarSizeInBits() == 1) {
2295 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2296 return ConstantMatCost +
2298 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2300 LT.first * getRISCVInstructionCost(
2301 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2308 return ConstantMatCost +
2309 LT.first * getRISCVInstructionCost(
2310 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2314 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2318 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2323 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2328 return ConstantMatCost +
2329 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2335 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2336 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2337 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2339 Op1Info, Op2Info,
I);
2348 return ConstantMatCost +
2349 LT.first * getRISCVInstructionCost(
2350 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2357 return ConstantMatCost +
2359 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2368 return ConstantMatCost +
2370 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2381 ValTy->isIntegerTy() && !
I->user_empty()) {
2383 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2384 U->getType()->isIntegerTy() &&
2385 !isa<ConstantData>(U->getOperand(1)) &&
2386 !isa<ConstantData>(U->getOperand(2));
2394 Op1Info, Op2Info,
I);
2401 return Opcode == Instruction::PHI ? 0 : 1;
2410 const Value *Op1)
const {
2420 if (Opcode != Instruction::ExtractElement &&
2421 Opcode != Instruction::InsertElement)
2428 if (!LT.second.isVector()) {
2437 Type *ElemTy = FixedVecTy->getElementType();
2438 auto NumElems = FixedVecTy->getNumElements();
2439 auto Align =
DL.getPrefTypeAlign(ElemTy);
2444 return Opcode == Instruction::ExtractElement
2445 ? StoreCost * NumElems + LoadCost
2446 : (StoreCost + LoadCost) * NumElems + StoreCost;
2450 if (LT.second.isScalableVector() && !LT.first.isValid())
2458 if (Opcode == Instruction::ExtractElement) {
2464 return ExtendCost + ExtractCost;
2474 return ExtendCost + InsertCost + TruncCost;
2480 unsigned BaseCost = 1;
2482 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2487 if (LT.second.isFixedLengthVector()) {
2488 unsigned Width = LT.second.getVectorNumElements();
2489 Index = Index % Width;
2494 if (
auto VLEN = ST->getRealVLen()) {
2495 unsigned EltSize = LT.second.getScalarSizeInBits();
2496 unsigned M1Max = *VLEN / EltSize;
2497 Index = Index % M1Max;
2503 else if (ST->hasVendorXRivosVisni() &&
isUInt<5>(Index) &&
2506 else if (Opcode == Instruction::InsertElement)
2514 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2515 LT.second.isScalableVector()))) {
2517 Align VecAlign =
DL.getPrefTypeAlign(Val);
2518 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2523 if (Opcode == Instruction::ExtractElement)
2559 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2561 return BaseCost + SlideCost;
2567 unsigned Index)
const {
2576 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2578 EC.getKnownMinValue() - 1 - Index,
nullptr,
2605 if (!LT.second.isVector())
2611 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2613 if ((LT.second.getVectorElementType() == MVT::f16 ||
2614 LT.second.getVectorElementType() == MVT::bf16) &&
2615 TLI->getOperationAction(ISDOpcode, LT.second) ==
2617 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2621 CastCost += LT.first * Args.size() *
2629 LT.second = PromotedVT;
2632 auto getConstantMatCost =
2642 return getConstantPoolLoadCost(Ty,
CostKind);
2648 ConstantMatCost += getConstantMatCost(0, Op1Info);
2650 ConstantMatCost += getConstantMatCost(1, Op2Info);
2653 switch (ISDOpcode) {
2656 Op = RISCV::VADD_VV;
2661 Op = RISCV::VSLL_VV;
2666 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2671 Op = RISCV::VMUL_VV;
2675 Op = RISCV::VDIV_VV;
2679 Op = RISCV::VREM_VV;
2683 Op = RISCV::VFADD_VV;
2686 Op = RISCV::VFMUL_VV;
2689 Op = RISCV::VFDIV_VV;
2692 Op = RISCV::VFSGNJN_VV;
2697 return CastCost + ConstantMatCost +
2706 if (Ty->isFPOrFPVectorTy())
2708 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2731 if (Info.isSameBase() && V !=
Base) {
2732 if (
GEP->hasAllConstantIndices())
2738 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2739 if (Info.isUnitStride() &&
2745 GEP->getType()->getPointerAddressSpace()))
2748 {TTI::OK_AnyValue, TTI::OP_None},
2749 {TTI::OK_AnyValue, TTI::OP_None}, {});
2766 if (ST->enableDefaultUnroll())
2776 if (L->getHeader()->getParent()->hasOptSize())
2780 L->getExitingBlocks(ExitingBlocks);
2782 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2783 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2787 if (ExitingBlocks.
size() > 2)
2792 if (L->getNumBlocks() > 4)
2800 for (
auto *BB : L->getBlocks()) {
2801 for (
auto &
I : *BB) {
2805 if (IsVectorized &&
I.getType()->isVectorTy())
2845 bool HasMask =
false;
2848 bool IsWrite) -> int64_t {
2849 if (
auto *TarExtTy =
2851 return TarExtTy->getIntParameter(0);
2857 case Intrinsic::riscv_vle_mask:
2858 case Intrinsic::riscv_vse_mask:
2859 case Intrinsic::riscv_vlseg2_mask:
2860 case Intrinsic::riscv_vlseg3_mask:
2861 case Intrinsic::riscv_vlseg4_mask:
2862 case Intrinsic::riscv_vlseg5_mask:
2863 case Intrinsic::riscv_vlseg6_mask:
2864 case Intrinsic::riscv_vlseg7_mask:
2865 case Intrinsic::riscv_vlseg8_mask:
2866 case Intrinsic::riscv_vsseg2_mask:
2867 case Intrinsic::riscv_vsseg3_mask:
2868 case Intrinsic::riscv_vsseg4_mask:
2869 case Intrinsic::riscv_vsseg5_mask:
2870 case Intrinsic::riscv_vsseg6_mask:
2871 case Intrinsic::riscv_vsseg7_mask:
2872 case Intrinsic::riscv_vsseg8_mask:
2875 case Intrinsic::riscv_vle:
2876 case Intrinsic::riscv_vse:
2877 case Intrinsic::riscv_vlseg2:
2878 case Intrinsic::riscv_vlseg3:
2879 case Intrinsic::riscv_vlseg4:
2880 case Intrinsic::riscv_vlseg5:
2881 case Intrinsic::riscv_vlseg6:
2882 case Intrinsic::riscv_vlseg7:
2883 case Intrinsic::riscv_vlseg8:
2884 case Intrinsic::riscv_vsseg2:
2885 case Intrinsic::riscv_vsseg3:
2886 case Intrinsic::riscv_vsseg4:
2887 case Intrinsic::riscv_vsseg5:
2888 case Intrinsic::riscv_vsseg6:
2889 case Intrinsic::riscv_vsseg7:
2890 case Intrinsic::riscv_vsseg8: {
2907 Ty = TarExtTy->getTypeParameter(0U);
2912 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2913 unsigned VLIndex = RVVIInfo->VLOperand;
2914 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
2922 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2925 unsigned ElemSize = Ty->getScalarSizeInBits();
2929 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2930 Alignment, Mask, EVL);
2933 case Intrinsic::riscv_vlse_mask:
2934 case Intrinsic::riscv_vsse_mask:
2935 case Intrinsic::riscv_vlsseg2_mask:
2936 case Intrinsic::riscv_vlsseg3_mask:
2937 case Intrinsic::riscv_vlsseg4_mask:
2938 case Intrinsic::riscv_vlsseg5_mask:
2939 case Intrinsic::riscv_vlsseg6_mask:
2940 case Intrinsic::riscv_vlsseg7_mask:
2941 case Intrinsic::riscv_vlsseg8_mask:
2942 case Intrinsic::riscv_vssseg2_mask:
2943 case Intrinsic::riscv_vssseg3_mask:
2944 case Intrinsic::riscv_vssseg4_mask:
2945 case Intrinsic::riscv_vssseg5_mask:
2946 case Intrinsic::riscv_vssseg6_mask:
2947 case Intrinsic::riscv_vssseg7_mask:
2948 case Intrinsic::riscv_vssseg8_mask:
2951 case Intrinsic::riscv_vlse:
2952 case Intrinsic::riscv_vsse:
2953 case Intrinsic::riscv_vlsseg2:
2954 case Intrinsic::riscv_vlsseg3:
2955 case Intrinsic::riscv_vlsseg4:
2956 case Intrinsic::riscv_vlsseg5:
2957 case Intrinsic::riscv_vlsseg6:
2958 case Intrinsic::riscv_vlsseg7:
2959 case Intrinsic::riscv_vlsseg8:
2960 case Intrinsic::riscv_vssseg2:
2961 case Intrinsic::riscv_vssseg3:
2962 case Intrinsic::riscv_vssseg4:
2963 case Intrinsic::riscv_vssseg5:
2964 case Intrinsic::riscv_vssseg6:
2965 case Intrinsic::riscv_vssseg7:
2966 case Intrinsic::riscv_vssseg8: {
2983 Ty = TarExtTy->getTypeParameter(0U);
2988 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2989 unsigned VLIndex = RVVIInfo->VLOperand;
2990 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3002 Alignment =
Align(1);
3009 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3012 unsigned ElemSize = Ty->getScalarSizeInBits();
3016 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3017 Alignment, Mask, EVL, Stride);
3020 case Intrinsic::riscv_vloxei_mask:
3021 case Intrinsic::riscv_vluxei_mask:
3022 case Intrinsic::riscv_vsoxei_mask:
3023 case Intrinsic::riscv_vsuxei_mask:
3024 case Intrinsic::riscv_vloxseg2_mask:
3025 case Intrinsic::riscv_vloxseg3_mask:
3026 case Intrinsic::riscv_vloxseg4_mask:
3027 case Intrinsic::riscv_vloxseg5_mask:
3028 case Intrinsic::riscv_vloxseg6_mask:
3029 case Intrinsic::riscv_vloxseg7_mask:
3030 case Intrinsic::riscv_vloxseg8_mask:
3031 case Intrinsic::riscv_vluxseg2_mask:
3032 case Intrinsic::riscv_vluxseg3_mask:
3033 case Intrinsic::riscv_vluxseg4_mask:
3034 case Intrinsic::riscv_vluxseg5_mask:
3035 case Intrinsic::riscv_vluxseg6_mask:
3036 case Intrinsic::riscv_vluxseg7_mask:
3037 case Intrinsic::riscv_vluxseg8_mask:
3038 case Intrinsic::riscv_vsoxseg2_mask:
3039 case Intrinsic::riscv_vsoxseg3_mask:
3040 case Intrinsic::riscv_vsoxseg4_mask:
3041 case Intrinsic::riscv_vsoxseg5_mask:
3042 case Intrinsic::riscv_vsoxseg6_mask:
3043 case Intrinsic::riscv_vsoxseg7_mask:
3044 case Intrinsic::riscv_vsoxseg8_mask:
3045 case Intrinsic::riscv_vsuxseg2_mask:
3046 case Intrinsic::riscv_vsuxseg3_mask:
3047 case Intrinsic::riscv_vsuxseg4_mask:
3048 case Intrinsic::riscv_vsuxseg5_mask:
3049 case Intrinsic::riscv_vsuxseg6_mask:
3050 case Intrinsic::riscv_vsuxseg7_mask:
3051 case Intrinsic::riscv_vsuxseg8_mask:
3054 case Intrinsic::riscv_vloxei:
3055 case Intrinsic::riscv_vluxei:
3056 case Intrinsic::riscv_vsoxei:
3057 case Intrinsic::riscv_vsuxei:
3058 case Intrinsic::riscv_vloxseg2:
3059 case Intrinsic::riscv_vloxseg3:
3060 case Intrinsic::riscv_vloxseg4:
3061 case Intrinsic::riscv_vloxseg5:
3062 case Intrinsic::riscv_vloxseg6:
3063 case Intrinsic::riscv_vloxseg7:
3064 case Intrinsic::riscv_vloxseg8:
3065 case Intrinsic::riscv_vluxseg2:
3066 case Intrinsic::riscv_vluxseg3:
3067 case Intrinsic::riscv_vluxseg4:
3068 case Intrinsic::riscv_vluxseg5:
3069 case Intrinsic::riscv_vluxseg6:
3070 case Intrinsic::riscv_vluxseg7:
3071 case Intrinsic::riscv_vluxseg8:
3072 case Intrinsic::riscv_vsoxseg2:
3073 case Intrinsic::riscv_vsoxseg3:
3074 case Intrinsic::riscv_vsoxseg4:
3075 case Intrinsic::riscv_vsoxseg5:
3076 case Intrinsic::riscv_vsoxseg6:
3077 case Intrinsic::riscv_vsoxseg7:
3078 case Intrinsic::riscv_vsoxseg8:
3079 case Intrinsic::riscv_vsuxseg2:
3080 case Intrinsic::riscv_vsuxseg3:
3081 case Intrinsic::riscv_vsuxseg4:
3082 case Intrinsic::riscv_vsuxseg5:
3083 case Intrinsic::riscv_vsuxseg6:
3084 case Intrinsic::riscv_vsuxseg7:
3085 case Intrinsic::riscv_vsuxseg8: {
3102 Ty = TarExtTy->getTypeParameter(0U);
3107 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3108 unsigned VLIndex = RVVIInfo->VLOperand;
3109 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3122 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3125 unsigned ElemSize = Ty->getScalarSizeInBits();
3130 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3131 Align(1), Mask, EVL,
3140 if (Ty->isVectorTy()) {
3143 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
3149 if (
Size.isScalable() && ST->hasVInstructions())
3152 if (ST->useRVVForFixedLengthVectors())
3172 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
3180 return ST->enableUnalignedVectorMem();
3186 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3208 Align Alignment)
const {
3210 if (!VTy || VTy->isScalableTy())
3218 if (VTy->getElementType()->isIntegerTy(8))
3219 if (VTy->getElementCount().getFixedValue() > 256)
3220 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3221 ST->getMaxLMULForFixedLengthVectors();
3226 Align Alignment)
const {
3228 if (!VTy || VTy->isScalableTy())
3242 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
3243 bool Considerable =
false;
3244 AllowPromotionWithoutCommonHeader =
false;
3247 Type *ConsideredSExtType =
3249 if (
I.getType() != ConsideredSExtType)
3253 for (
const User *U :
I.users()) {
3255 Considerable =
true;
3259 if (GEPInst->getNumOperands() > 2) {
3260 AllowPromotionWithoutCommonHeader =
true;
3265 return Considerable;
3270 case Instruction::Add:
3271 case Instruction::Sub:
3272 case Instruction::Mul:
3273 case Instruction::And:
3274 case Instruction::Or:
3275 case Instruction::Xor:
3276 case Instruction::FAdd:
3277 case Instruction::FSub:
3278 case Instruction::FMul:
3279 case Instruction::FDiv:
3280 case Instruction::ICmp:
3281 case Instruction::FCmp:
3283 case Instruction::Shl:
3284 case Instruction::LShr:
3285 case Instruction::AShr:
3286 case Instruction::UDiv:
3287 case Instruction::SDiv:
3288 case Instruction::URem:
3289 case Instruction::SRem:
3290 case Instruction::Select:
3291 return Operand == 1;
3298 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3308 switch (
II->getIntrinsicID()) {
3309 case Intrinsic::fma:
3310 case Intrinsic::vp_fma:
3311 case Intrinsic::fmuladd:
3312 case Intrinsic::vp_fmuladd:
3313 return Operand == 0 || Operand == 1;
3314 case Intrinsic::vp_shl:
3315 case Intrinsic::vp_lshr:
3316 case Intrinsic::vp_ashr:
3317 case Intrinsic::vp_udiv:
3318 case Intrinsic::vp_sdiv:
3319 case Intrinsic::vp_urem:
3320 case Intrinsic::vp_srem:
3321 case Intrinsic::ssub_sat:
3322 case Intrinsic::vp_ssub_sat:
3323 case Intrinsic::usub_sat:
3324 case Intrinsic::vp_usub_sat:
3325 case Intrinsic::vp_select:
3326 return Operand == 1;
3328 case Intrinsic::vp_add:
3329 case Intrinsic::vp_mul:
3330 case Intrinsic::vp_and:
3331 case Intrinsic::vp_or:
3332 case Intrinsic::vp_xor:
3333 case Intrinsic::vp_fadd:
3334 case Intrinsic::vp_fmul:
3335 case Intrinsic::vp_icmp:
3336 case Intrinsic::vp_fcmp:
3337 case Intrinsic::smin:
3338 case Intrinsic::vp_smin:
3339 case Intrinsic::umin:
3340 case Intrinsic::vp_umin:
3341 case Intrinsic::smax:
3342 case Intrinsic::vp_smax:
3343 case Intrinsic::umax:
3344 case Intrinsic::vp_umax:
3345 case Intrinsic::sadd_sat:
3346 case Intrinsic::vp_sadd_sat:
3347 case Intrinsic::uadd_sat:
3348 case Intrinsic::vp_uadd_sat:
3350 case Intrinsic::vp_sub:
3351 case Intrinsic::vp_fsub:
3352 case Intrinsic::vp_fdiv:
3353 return Operand == 0 || Operand == 1;
3366 if (
I->isBitwiseLogicOp()) {
3367 if (!
I->getType()->isVectorTy()) {
3368 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3369 for (
auto &
Op :
I->operands()) {
3377 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3378 for (
auto &
Op :
I->operands()) {
3390 Ops.push_back(&Not);
3391 Ops.push_back(&InsertElt);
3399 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3407 if (!ST->sinkSplatOperands())
3433 for (
Use &U :
Op->uses()) {
3442 Ops.push_back(&
Op->getOperandUse(0));
3444 Use *InsertEltUse = &
Op->getOperandUse(0);
3447 Ops.push_back(&InsertElt->getOperandUse(1));
3448 Ops.push_back(InsertEltUse);
3460 if (!ST->enableUnalignedScalarMem())
3463 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3466 Options.AllowOverlappingLoads =
true;
3467 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3469 if (ST->is64Bit()) {
3470 Options.LoadSizes = {8, 4, 2, 1};
3471 Options.AllowedTailExpansions = {3, 5, 6};
3473 Options.LoadSizes = {4, 2, 1};
3474 Options.AllowedTailExpansions = {3};
3477 if (IsZeroCmp && ST->hasVInstructions()) {
3478 unsigned VLenB = ST->getRealMinVLen() / 8;
3481 unsigned MinSize = ST->getXLen() / 8 + 1;
3482 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.