24#define DEBUG_TYPE "riscvtti"
27 "riscv-v-register-bit-width-lmul",
29 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
30 "by autovectorized code. Fractional LMULs are not supported."),
36 "Overrides result used for getMaximumVF query which is used "
37 "exclusively by SLP vectorizer."),
46 size_t NumInstr = OpCodes.
size();
51 return LMULCost * NumInstr;
53 for (
auto Op : OpCodes) {
55 case RISCV::VRGATHER_VI:
58 case RISCV::VRGATHER_VV:
61 case RISCV::VSLIDEUP_VI:
62 case RISCV::VSLIDEDOWN_VI:
65 case RISCV::VSLIDEUP_VX:
66 case RISCV::VSLIDEDOWN_VX:
69 case RISCV::VREDMAX_VS:
70 case RISCV::VREDMIN_VS:
71 case RISCV::VREDMAXU_VS:
72 case RISCV::VREDMINU_VS:
73 case RISCV::VREDSUM_VS:
74 case RISCV::VREDAND_VS:
75 case RISCV::VREDOR_VS:
76 case RISCV::VREDXOR_VS:
77 case RISCV::VFREDMAX_VS:
78 case RISCV::VFREDMIN_VS:
79 case RISCV::VFREDUSUM_VS: {
86 case RISCV::VFREDOSUM_VS: {
100 case RISCV::VMANDN_MM:
101 case RISCV::VMNAND_MM:
103 case RISCV::VFIRST_M:
119 "getIntImmCost can only estimate cost of materialising integers");
140 auto *BO = dyn_cast<BinaryOperator>(Inst->
getOperand(0));
141 if (!BO || !BO->hasOneUse())
144 if (BO->getOpcode() != Instruction::Shl)
147 if (!isa<ConstantInt>(BO->getOperand(1)))
150 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
155 if (ShAmt == Trailing)
167 "getIntImmCost can only estimate cost of materialising integers");
175 bool Takes12BitImm =
false;
176 unsigned ImmArgIdx = ~0U;
179 case Instruction::GetElementPtr:
184 case Instruction::Store: {
189 if (
Idx == 1 || !Inst)
194 if (!getTLI()->allowsMemoryAccessForAlignment(
196 ST->getPointerAddressSpace(), ST->getAlign()))
202 case Instruction::Load:
205 case Instruction::And:
207 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
210 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
213 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
215 if (Inst &&
Idx == 1 && Imm.getBitWidth() <= ST->
getXLen() &&
218 Takes12BitImm =
true;
220 case Instruction::Add:
221 Takes12BitImm =
true;
223 case Instruction::Or:
224 case Instruction::Xor:
226 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
228 Takes12BitImm =
true;
230 case Instruction::Mul:
232 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
235 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
238 Takes12BitImm =
true;
240 case Instruction::Sub:
241 case Instruction::Shl:
242 case Instruction::LShr:
243 case Instruction::AShr:
244 Takes12BitImm =
true;
255 if (Imm.getSignificantBits() <= 64 &&
284 return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->
is64Bit())
293 switch (
II->getIntrinsicID()) {
297 case Intrinsic::vector_reduce_mul:
298 case Intrinsic::vector_reduce_fmul:
348 unsigned Size = Mask.size();
351 for (
unsigned I = 0;
I !=
Size; ++
I) {
352 if (
static_cast<unsigned>(Mask[
I]) ==
I)
358 for (
unsigned J =
I + 1; J !=
Size; ++J)
360 if (
static_cast<unsigned>(Mask[J]) != J %
I)
376 return cast<VectorType>(
EVT(IndexVT).getTypeForEVT(
C));
392 if (isa<FixedVectorType>(Tp)) {
397 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
398 MVT EltTp = LT.second.getVectorElementType();
411 if (Mask[0] == 0 || Mask[0] == 1) {
415 if (
equal(DeinterleaveMask, Mask))
416 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
421 if (LT.second.getScalarSizeInBits() != 1 &&
424 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
426 for (
unsigned I = 0;
I != NumSlides; ++
I) {
427 unsigned InsertIndex = SubVectorSize * (1 <<
I);
432 std::pair<InstructionCost, MVT> DestLT =
446 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
447 (LT.second.getScalarSizeInBits() != 8 ||
448 LT.second.getVectorNumElements() <= 256)) {
452 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
461 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
462 (LT.second.getScalarSizeInBits() != 8 ||
463 LT.second.getVectorNumElements() <= 256)) {
470 return 2 * IndexCost +
471 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
481 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
482 LT.second.isFixedLengthVector() &&
483 LT.second.getVectorElementType().getSizeInBits() ==
485 LT.second.getVectorNumElements() <
486 cast<FixedVectorType>(Tp)->getNumElements() &&
488 cast<FixedVectorType>(Tp)->getNumElements()) ==
489 static_cast<unsigned>(*LT.first.getValue())) {
490 unsigned NumRegs = *LT.first.getValue();
491 unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
496 for (
unsigned I = 0, NumSrcRegs =
divideCeil(Mask.size(), SubVF);
497 I < NumSrcRegs; ++
I) {
498 bool IsSingleVector =
true;
501 Mask.slice(
I * SubVF,
502 I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF),
503 SubMask.
begin(), [&](
int I) ->
int {
504 if (I == PoisonMaskElem)
505 return PoisonMaskElem;
506 bool SingleSubVector = I / VF == 0;
507 IsSingleVector &= SingleSubVector;
508 return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF;
512 static_cast<unsigned>(
P.value()) ==
P.index();
517 SubVecTy, SubMask,
CostKind, 0,
nullptr);
545 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
548 if (MinVLen == MaxVLen &&
549 SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 &&
550 SubLT.second.getSizeInBits() <= MinVLen)
558 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
564 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
576 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
581 Instruction::InsertElement);
582 if (LT.second.getScalarSizeInBits() == 1) {
590 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
603 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
604 RISCV::VMV_X_S, RISCV::VMV_V_X,
613 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
619 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
625 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
626 if (Index >= 0 && Index < 32)
627 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
628 else if (Index < 0 && Index > -32)
629 Opcodes[1] = RISCV::VSLIDEUP_VI;
630 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
648 if (LT.second.isFixedLengthVector())
650 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
651 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
652 if (LT.second.isFixedLengthVector() &&
653 isInt<5>(LT.second.getVectorNumElements() - 1))
654 Opcodes[1] = RISCV::VRSUB_VI;
656 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
659 return LT.first * (LenCost + GatherCost + ExtendCost);
674 if (isa<ScalableVectorType>(Ty))
682 Ty, DemandedElts, Insert, Extract,
CostKind);
684 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
695 assert(LT.second.isFixedLengthVector());
699 cast<FixedVectorType>(Ty)->getNumElements() *
700 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
723 bool UseMaskForCond,
bool UseMaskForGaps) {
728 if (!UseMaskForCond && !UseMaskForGaps &&
729 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
730 auto *VTy = cast<VectorType>(VecTy);
733 if (LT.second.isVector()) {
736 VTy->getElementCount().divideCoefficientBy(Factor));
737 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
748 return LT.first *
Cost;
755 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
756 unsigned NumLoads = getEstimatedVLFor(VTy);
757 return NumLoads * MemOpCost;
764 if (isa<ScalableVectorType>(VecTy))
767 auto *FVTy = cast<FixedVectorType>(VecTy);
770 unsigned VF = FVTy->getNumElements() / Factor;
777 if (Opcode == Instruction::Load) {
779 for (
unsigned Index : Indices) {
804 UseMaskForCond, UseMaskForGaps);
806 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
813 return MemCost + ShuffleCost;
817 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
823 if ((Opcode == Instruction::Load &&
825 (Opcode == Instruction::Store &&
833 auto &VTy = *cast<VectorType>(DataTy);
836 {TTI::OK_AnyValue, TTI::OP_None},
I);
837 unsigned NumLoads = getEstimatedVLFor(&VTy);
838 return NumLoads * MemOpCost;
842 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
844 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
846 (Opcode != Instruction::Load && Opcode != Instruction::Store))
856 auto &VTy = *cast<VectorType>(DataTy);
859 {TTI::OK_AnyValue, TTI::OP_None},
I);
860 unsigned NumLoads = getEstimatedVLFor(&VTy);
861 return NumLoads * MemOpCost;
871 for (
auto *Ty : Tys) {
872 if (!Ty->isVectorTy())
886 {Intrinsic::floor, MVT::f32, 9},
887 {Intrinsic::floor, MVT::f64, 9},
888 {Intrinsic::ceil, MVT::f32, 9},
889 {Intrinsic::ceil, MVT::f64, 9},
890 {Intrinsic::trunc, MVT::f32, 7},
891 {Intrinsic::trunc, MVT::f64, 7},
892 {Intrinsic::round, MVT::f32, 9},
893 {Intrinsic::round, MVT::f64, 9},
894 {Intrinsic::roundeven, MVT::f32, 9},
895 {Intrinsic::roundeven, MVT::f64, 9},
896 {Intrinsic::rint, MVT::f32, 7},
897 {Intrinsic::rint, MVT::f64, 7},
898 {Intrinsic::lrint, MVT::i32, 1},
899 {Intrinsic::lrint, MVT::i64, 1},
900 {Intrinsic::llrint, MVT::i64, 1},
901 {Intrinsic::nearbyint, MVT::f32, 9},
902 {Intrinsic::nearbyint, MVT::f64, 9},
903 {Intrinsic::bswap, MVT::i16, 3},
904 {Intrinsic::bswap, MVT::i32, 12},
905 {Intrinsic::bswap, MVT::i64, 31},
906 {Intrinsic::vp_bswap, MVT::i16, 3},
907 {Intrinsic::vp_bswap, MVT::i32, 12},
908 {Intrinsic::vp_bswap, MVT::i64, 31},
909 {Intrinsic::vp_fshl, MVT::i8, 7},
910 {Intrinsic::vp_fshl, MVT::i16, 7},
911 {Intrinsic::vp_fshl, MVT::i32, 7},
912 {Intrinsic::vp_fshl, MVT::i64, 7},
913 {Intrinsic::vp_fshr, MVT::i8, 7},
914 {Intrinsic::vp_fshr, MVT::i16, 7},
915 {Intrinsic::vp_fshr, MVT::i32, 7},
916 {Intrinsic::vp_fshr, MVT::i64, 7},
917 {Intrinsic::bitreverse, MVT::i8, 17},
918 {Intrinsic::bitreverse, MVT::i16, 24},
919 {Intrinsic::bitreverse, MVT::i32, 33},
920 {Intrinsic::bitreverse, MVT::i64, 52},
921 {Intrinsic::vp_bitreverse, MVT::i8, 17},
922 {Intrinsic::vp_bitreverse, MVT::i16, 24},
923 {Intrinsic::vp_bitreverse, MVT::i32, 33},
924 {Intrinsic::vp_bitreverse, MVT::i64, 52},
925 {Intrinsic::ctpop, MVT::i8, 12},
926 {Intrinsic::ctpop, MVT::i16, 19},
927 {Intrinsic::ctpop, MVT::i32, 20},
928 {Intrinsic::ctpop, MVT::i64, 21},
929 {Intrinsic::ctlz, MVT::i8, 19},
930 {Intrinsic::ctlz, MVT::i16, 28},
931 {Intrinsic::ctlz, MVT::i32, 31},
932 {Intrinsic::ctlz, MVT::i64, 35},
933 {Intrinsic::cttz, MVT::i8, 16},
934 {Intrinsic::cttz, MVT::i16, 23},
935 {Intrinsic::cttz, MVT::i32, 24},
936 {Intrinsic::cttz, MVT::i64, 25},
937 {Intrinsic::vp_ctpop, MVT::i8, 12},
938 {Intrinsic::vp_ctpop, MVT::i16, 19},
939 {Intrinsic::vp_ctpop, MVT::i32, 20},
940 {Intrinsic::vp_ctpop, MVT::i64, 21},
941 {Intrinsic::vp_ctlz, MVT::i8, 19},
942 {Intrinsic::vp_ctlz, MVT::i16, 28},
943 {Intrinsic::vp_ctlz, MVT::i32, 31},
944 {Intrinsic::vp_ctlz, MVT::i64, 35},
945 {Intrinsic::vp_cttz, MVT::i8, 16},
946 {Intrinsic::vp_cttz, MVT::i16, 23},
947 {Intrinsic::vp_cttz, MVT::i32, 24},
948 {Intrinsic::vp_cttz, MVT::i64, 25},
953#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
954 case Intrinsic::VPID: \
956#include "llvm/IR/VPIntrinsics.def"
957#undef HELPER_MAP_VPID_TO_VPSD
966 switch (ICA.
getID()) {
967 case Intrinsic::lrint:
968 case Intrinsic::llrint:
970 if (
auto *VecTy = dyn_cast<VectorType>(ICA.
getArgTypes()[0]);
971 VecTy && VecTy->getElementType()->is16bitFPTy())
974 case Intrinsic::ceil:
975 case Intrinsic::floor:
976 case Intrinsic::trunc:
977 case Intrinsic::rint:
978 case Intrinsic::round:
979 case Intrinsic::roundeven: {
986 case Intrinsic::umin:
987 case Intrinsic::umax:
988 case Intrinsic::smin:
989 case Intrinsic::smax: {
991 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
996 switch (ICA.
getID()) {
997 case Intrinsic::umin:
998 Op = RISCV::VMINU_VV;
1000 case Intrinsic::umax:
1001 Op = RISCV::VMAXU_VV;
1003 case Intrinsic::smin:
1004 Op = RISCV::VMIN_VV;
1006 case Intrinsic::smax:
1007 Op = RISCV::VMAX_VV;
1010 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1014 case Intrinsic::sadd_sat:
1015 case Intrinsic::ssub_sat:
1016 case Intrinsic::uadd_sat:
1017 case Intrinsic::usub_sat: {
1021 switch (ICA.
getID()) {
1022 case Intrinsic::sadd_sat:
1023 Op = RISCV::VSADD_VV;
1025 case Intrinsic::ssub_sat:
1026 Op = RISCV::VSSUBU_VV;
1028 case Intrinsic::uadd_sat:
1029 Op = RISCV::VSADDU_VV;
1031 case Intrinsic::usub_sat:
1032 Op = RISCV::VSSUBU_VV;
1035 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1039 case Intrinsic::fabs: {
1047 if (LT.second.getVectorElementType() == MVT::bf16 ||
1048 (LT.second.getVectorElementType() == MVT::f16 &&
1050 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1055 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1059 case Intrinsic::sqrt: {
1064 MVT ConvType = LT.second;
1065 MVT FsqrtType = LT.second;
1068 if (LT.second.getVectorElementType() == MVT::bf16) {
1069 if (LT.second == MVT::nxv32bf16) {
1070 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1071 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1072 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1073 ConvType = MVT::nxv16f16;
1074 FsqrtType = MVT::nxv16f32;
1076 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1077 FsqrtOp = {RISCV::VFSQRT_V};
1080 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1082 if (LT.second == MVT::nxv32f16) {
1083 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1084 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1085 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1086 ConvType = MVT::nxv16f16;
1087 FsqrtType = MVT::nxv16f32;
1089 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1090 FsqrtOp = {RISCV::VFSQRT_V};
1094 FsqrtOp = {RISCV::VFSQRT_V};
1097 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1098 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1102 case Intrinsic::cttz:
1103 case Intrinsic::ctlz:
1104 case Intrinsic::ctpop: {
1106 if (ST->
hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) {
1108 switch (ICA.
getID()) {
1109 case Intrinsic::cttz:
1112 case Intrinsic::ctlz:
1115 case Intrinsic::ctpop:
1116 Op = RISCV::VCPOP_V;
1119 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1123 case Intrinsic::abs: {
1129 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1134 case Intrinsic::get_active_lane_mask: {
1144 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1150 case Intrinsic::stepvector: {
1155 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1157 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1158 return 1 + (LT.first - 1);
1160 case Intrinsic::experimental_cttz_elts: {
1172 cast<ConstantInt>(ICA.
getArgs()[1])->isZero())
1180 case Intrinsic::vp_rint: {
1185 return Cost * LT.first;
1188 case Intrinsic::vp_nearbyint: {
1193 return Cost * LT.first;
1196 case Intrinsic::vp_ceil:
1197 case Intrinsic::vp_floor:
1198 case Intrinsic::vp_round:
1199 case Intrinsic::vp_roundeven:
1200 case Intrinsic::vp_roundtozero: {
1207 return Cost * LT.first;
1210 case Intrinsic::vp_fneg: {
1211 std::optional<unsigned> FOp =
1217 case Intrinsic::vp_select: {
1224 case Intrinsic::vp_merge:
1228 case Intrinsic::experimental_vp_splat: {
1233 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1242 LT.second.isVector()) {
1243 MVT EltTy = LT.second.getVectorElementType();
1245 ICA.
getID(), EltTy))
1246 return LT.first * Entry->Cost;
1258 bool IsVectorType = isa<VectorType>(Dst) && isa<VectorType>(Src);
1266 Dst->getScalarSizeInBits() > ST->
getELen())
1270 assert(ISD &&
"Invalid opcode");
1284 if (Src->getScalarSizeInBits() == 1) {
1289 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1290 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1296 if (Dst->getScalarSizeInBits() == 1) {
1302 return SrcLT.first *
1303 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1315 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1317 SrcLT.second.getSizeInBits()) ||
1319 DstLT.second.getSizeInBits()))
1323 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1325 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1326 (
int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1330 if ((PowDiff < 1) || (PowDiff > 3))
1332 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1333 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1336 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1342 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1343 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1347 : RISCV::VFNCVT_F_F_W;
1349 for (; SrcEltSize != DstEltSize;) {
1355 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1363 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1365 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1367 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1368 unsigned SrcEltSize = Src->getScalarSizeInBits();
1369 unsigned DstEltSize = Dst->getScalarSizeInBits();
1371 if ((SrcEltSize == 16) &&
1377 cast<VectorType>(Dst)->getElementCount());
1378 std::pair<InstructionCost, MVT> VecF32LT =
1381 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1386 if (DstEltSize == SrcEltSize)
1387 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1388 else if (DstEltSize > SrcEltSize)
1389 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1395 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1396 if ((SrcEltSize / 2) > DstEltSize) {
1407 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1408 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1409 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1410 unsigned SrcEltSize = Src->getScalarSizeInBits();
1411 unsigned DstEltSize = Dst->getScalarSizeInBits();
1414 if ((DstEltSize == 16) &&
1420 cast<VectorType>(Dst)->getElementCount());
1421 std::pair<InstructionCost, MVT> VecF32LT =
1424 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1429 if (DstEltSize == SrcEltSize)
1430 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1431 else if (DstEltSize > SrcEltSize) {
1432 if ((DstEltSize / 2) > SrcEltSize) {
1435 cast<VectorType>(Dst)->getElementCount());
1436 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1439 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1441 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1448unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty) {
1449 if (isa<ScalableVectorType>(Ty)) {
1455 return cast<FixedVectorType>(Ty)->getNumElements();
1474 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1480 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1484 case Intrinsic::maximum:
1486 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1488 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1503 case Intrinsic::minimum:
1505 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1507 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1522 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1531 case Intrinsic::smax:
1532 SplitOp = RISCV::VMAX_VV;
1533 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1535 case Intrinsic::smin:
1536 SplitOp = RISCV::VMIN_VV;
1537 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1539 case Intrinsic::umax:
1540 SplitOp = RISCV::VMAXU_VV;
1541 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1543 case Intrinsic::umin:
1544 SplitOp = RISCV::VMINU_VV;
1545 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1547 case Intrinsic::maxnum:
1548 SplitOp = RISCV::VFMAX_VV;
1549 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1551 case Intrinsic::minnum:
1552 SplitOp = RISCV::VFMIN_VV;
1553 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1558 (LT.first > 1) ? (LT.first - 1) *
1559 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1561 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1566 std::optional<FastMathFlags> FMF,
1576 assert(ISD &&
"Invalid opcode");
1588 if (LT.second == MVT::v1i1)
1589 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
1607 return ((LT.first > 2) ? (LT.first - 2) : 0) *
1608 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
1609 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
1610 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1619 return (LT.first - 1) *
1620 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
1621 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
1629 return (LT.first - 1) *
1630 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
1631 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1644 SplitOp = RISCV::VADD_VV;
1645 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1648 SplitOp = RISCV::VOR_VV;
1649 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
1652 SplitOp = RISCV::VXOR_VV;
1653 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1656 SplitOp = RISCV::VAND_VV;
1657 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
1661 if ((LT.second.getVectorElementType() == MVT::f16 &&
1663 LT.second.getVectorElementType() == MVT::bf16)
1667 for (
unsigned i = 0; i < LT.first.getValue(); i++)
1670 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1672 SplitOp = RISCV::VFADD_VV;
1673 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1678 (LT.first > 1) ? (LT.first - 1) *
1679 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1681 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1685 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1696 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
1702 if (IsUnsigned && Opcode == Instruction::Add &&
1703 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
1707 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
1714 return (LT.first - 1) +
1722 if (!isa<VectorType>(Ty))
1734 return getConstantPoolLoadCost(Ty,
CostKind);
1746 if (VT == MVT::Other)
1751 if (Opcode == Instruction::Store && OpInfo.
isConstant())
1766 if (Src->
isVectorTy() && LT.second.isVector() &&
1768 LT.second.getSizeInBits()))
1780 return Cost + BaseCost;
1790 Op1Info, Op2Info,
I);
1794 Op1Info, Op2Info,
I);
1799 Op1Info, Op2Info,
I);
1801 auto GetConstantMatCost =
1803 if (OpInfo.isUniform())
1808 return getConstantPoolLoadCost(ValTy,
CostKind);
1813 ConstantMatCost += GetConstantMatCost(Op1Info);
1815 ConstantMatCost += GetConstantMatCost(Op2Info);
1818 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
1824 return ConstantMatCost +
1826 getRISCVInstructionCost(
1827 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1831 return ConstantMatCost +
1832 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
1842 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
1843 return ConstantMatCost +
1845 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
1847 LT.first * getRISCVInstructionCost(
1848 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1855 return ConstantMatCost +
1856 LT.first * getRISCVInstructionCost(
1857 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
1861 if ((Opcode == Instruction::ICmp) && ValTy->
isVectorTy() &&
1865 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
1870 if ((Opcode == Instruction::FCmp) && ValTy->
isVectorTy() &&
1875 return ConstantMatCost +
1876 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
1886 Op1Info, Op2Info,
I);
1895 return ConstantMatCost +
1896 LT.first * getRISCVInstructionCost(
1897 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
1904 return ConstantMatCost +
1906 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
1915 return ConstantMatCost +
1917 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
1930 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
1931 U->getType()->isIntegerTy() &&
1932 !isa<ConstantData>(U->getOperand(1)) &&
1933 !isa<ConstantData>(U->getOperand(2));
1941 Op1Info, Op2Info,
I);
1948 return Opcode == Instruction::PHI ? 0 : 1;
1955 unsigned Index,
Value *Op0,
1959 if (Opcode != Instruction::ExtractElement &&
1960 Opcode != Instruction::InsertElement)
1967 if (!LT.second.isVector()) {
1968 auto *FixedVecTy = cast<FixedVectorType>(Val);
1976 Type *ElemTy = FixedVecTy->getElementType();
1977 auto NumElems = FixedVecTy->getNumElements();
1983 return Opcode == Instruction::ExtractElement
1984 ? StoreCost * NumElems + LoadCost
1985 : (StoreCost + LoadCost) * NumElems + StoreCost;
1989 if (LT.second.isScalableVector() && !LT.first.isValid())
1996 cast<VectorType>(Val)->getElementCount());
1997 if (Opcode == Instruction::ExtractElement) {
2003 return ExtendCost + ExtractCost;
2013 return ExtendCost + InsertCost + TruncCost;
2019 unsigned BaseCost = 1;
2021 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2026 if (LT.second.isFixedLengthVector()) {
2027 unsigned Width = LT.second.getVectorNumElements();
2028 Index = Index % Width;
2034 unsigned EltSize = LT.second.getScalarSizeInBits();
2035 unsigned M1Max = *VLEN / EltSize;
2036 Index = Index % M1Max;
2042 else if (Opcode == Instruction::InsertElement)
2050 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2051 LT.second.isScalableVector()))) {
2059 if (Opcode == Instruction::ExtractElement)
2095 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2097 return BaseCost + SlideCost;
2123 if (!LT.second.isVector())
2131 if ((LT.second.getVectorElementType() == MVT::f16 ||
2132 LT.second.getVectorElementType() == MVT::bf16) &&
2139 CastCost += LT.first * Args.size() *
2147 LT.second = PromotedVT;
2150 auto getConstantMatCost =
2160 return getConstantPoolLoadCost(Ty,
CostKind);
2166 ConstantMatCost += getConstantMatCost(0, Op1Info);
2168 ConstantMatCost += getConstantMatCost(1, Op2Info);
2171 switch (ISDOpcode) {
2174 Op = RISCV::VADD_VV;
2179 Op = RISCV::VSLL_VV;
2189 Op = RISCV::VMUL_VV;
2193 Op = RISCV::VDIV_VV;
2197 Op = RISCV::VREM_VV;
2201 Op = RISCV::VFADD_VV;
2204 Op = RISCV::VFMUL_VV;
2207 Op = RISCV::VFDIV_VV;
2210 Op = RISCV::VFSGNJN_VV;
2215 return CastCost + ConstantMatCost +
2226 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2246 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
2249 if (
Info.isSameBase() && V !=
Base) {
2250 if (
GEP->hasAllConstantIndices())
2257 if (
Info.isUnitStride() &&
2263 GEP->getType()->getPointerAddressSpace()))
2266 {TTI::OK_AnyValue, TTI::OP_None},
2267 {TTI::OK_AnyValue, TTI::OP_None}, {});
2284 if (ST->enableDefaultUnroll())
2294 if (L->getHeader()->getParent()->hasOptSize())
2298 L->getExitingBlocks(ExitingBlocks);
2300 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2301 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2305 if (ExitingBlocks.
size() > 2)
2310 if (L->getNumBlocks() > 4)
2320 for (
auto *BB : L->getBlocks()) {
2321 for (
auto &
I : *BB) {
2324 if (
I.getType()->isVectorTy())
2327 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2362 Type *EltTy = cast<VectorType>(Ty)->getElementType();
2366 cast<VectorType>(Ty));
2392 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
2398 if (ST->hasVendorXCVmem() && !ST->
is64Bit())
2420 auto *VTy = dyn_cast<VectorType>(DataTy);
2421 if (!VTy || VTy->isScalableTy())
2429 if (VTy->getElementType()->isIntegerTy(8))
2430 if (VTy->getElementCount().getFixedValue() > 256)
2437 auto *VTy = dyn_cast<VectorType>(DataTy);
2438 if (!VTy || VTy->isScalableTy())
2452 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
2453 bool Considerable =
false;
2454 AllowPromotionWithoutCommonHeader =
false;
2455 if (!isa<SExtInst>(&
I))
2457 Type *ConsideredSExtType =
2459 if (
I.getType() != ConsideredSExtType)
2463 for (
const User *U :
I.users()) {
2465 Considerable =
true;
2469 if (GEPInst->getNumOperands() > 2) {
2470 AllowPromotionWithoutCommonHeader =
true;
2475 return Considerable;
2480 case Instruction::Add:
2481 case Instruction::Sub:
2482 case Instruction::Mul:
2483 case Instruction::And:
2484 case Instruction::Or:
2485 case Instruction::Xor:
2486 case Instruction::FAdd:
2487 case Instruction::FSub:
2488 case Instruction::FMul:
2489 case Instruction::FDiv:
2490 case Instruction::ICmp:
2491 case Instruction::FCmp:
2493 case Instruction::Shl:
2494 case Instruction::LShr:
2495 case Instruction::AShr:
2496 case Instruction::UDiv:
2497 case Instruction::SDiv:
2498 case Instruction::URem:
2499 case Instruction::SRem:
2500 case Instruction::Select:
2501 return Operand == 1;
2514 auto *
II = dyn_cast<IntrinsicInst>(
I);
2518 switch (
II->getIntrinsicID()) {
2519 case Intrinsic::fma:
2520 case Intrinsic::vp_fma:
2521 case Intrinsic::fmuladd:
2522 case Intrinsic::vp_fmuladd:
2523 return Operand == 0 || Operand == 1;
2524 case Intrinsic::vp_shl:
2525 case Intrinsic::vp_lshr:
2526 case Intrinsic::vp_ashr:
2527 case Intrinsic::vp_udiv:
2528 case Intrinsic::vp_sdiv:
2529 case Intrinsic::vp_urem:
2530 case Intrinsic::vp_srem:
2531 case Intrinsic::ssub_sat:
2532 case Intrinsic::vp_ssub_sat:
2533 case Intrinsic::usub_sat:
2534 case Intrinsic::vp_usub_sat:
2535 case Intrinsic::vp_select:
2536 return Operand == 1;
2538 case Intrinsic::vp_add:
2539 case Intrinsic::vp_mul:
2540 case Intrinsic::vp_and:
2541 case Intrinsic::vp_or:
2542 case Intrinsic::vp_xor:
2543 case Intrinsic::vp_fadd:
2544 case Intrinsic::vp_fmul:
2545 case Intrinsic::vp_icmp:
2546 case Intrinsic::vp_fcmp:
2547 case Intrinsic::smin:
2548 case Intrinsic::vp_smin:
2549 case Intrinsic::umin:
2550 case Intrinsic::vp_umin:
2551 case Intrinsic::smax:
2552 case Intrinsic::vp_smax:
2553 case Intrinsic::umax:
2554 case Intrinsic::vp_umax:
2555 case Intrinsic::sadd_sat:
2556 case Intrinsic::vp_sadd_sat:
2557 case Intrinsic::uadd_sat:
2558 case Intrinsic::vp_uadd_sat:
2560 case Intrinsic::vp_sub:
2561 case Intrinsic::vp_fsub:
2562 case Intrinsic::vp_fdiv:
2563 return Operand == 0 || Operand == 1;
2584 if (!ST->sinkSplatOperands())
2587 for (
auto OpIdx :
enumerate(
I->operands())) {
2591 Instruction *
Op = dyn_cast<Instruction>(OpIdx.value().get());
2593 if (!
Op ||
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
2602 if (cast<VectorType>(
Op->getType())->getElementType()->isIntegerTy(1))
2607 for (
Use &U :
Op->uses()) {
2624 if (!ST->enableUnalignedScalarMem())
2627 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
2630 Options.AllowOverlappingLoads =
true;
2634 Options.LoadSizes = {8, 4, 2, 1};
2635 Options.AllowedTailExpansions = {3, 5, 6};
2637 Options.LoadSizes = {4, 2, 1};
2638 Options.AllowedTailExpansions = {3};
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)
bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
MVT getContainerForFixedLengthVector(MVT VT) const
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVII::VLMUL getLMUL(MVT VT)
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
The main scalar evolution driver.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
MVT getTypeToPromoteTo(unsigned Op, MVT VT) const
If the action for this operation is to promote, this method returns the ValueType to promote to.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.