23#define DEBUG_TYPE "riscvtti"
26 "riscv-v-register-bit-width-lmul",
28 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
29 "by autovectorized code. Fractional LMULs are not supported."),
35 "Overrides result used for getMaximumVF query which is used "
36 "exclusively by SLP vectorizer."),
45 size_t NumInstr = OpCodes.
size();
50 return LMULCost * NumInstr;
52 for (
auto Op : OpCodes) {
54 case RISCV::VRGATHER_VI:
57 case RISCV::VRGATHER_VV:
60 case RISCV::VSLIDEUP_VI:
61 case RISCV::VSLIDEDOWN_VI:
64 case RISCV::VSLIDEUP_VX:
65 case RISCV::VSLIDEDOWN_VX:
68 case RISCV::VREDMAX_VS:
69 case RISCV::VREDMIN_VS:
70 case RISCV::VREDMAXU_VS:
71 case RISCV::VREDMINU_VS:
72 case RISCV::VREDSUM_VS:
73 case RISCV::VREDAND_VS:
74 case RISCV::VREDOR_VS:
75 case RISCV::VREDXOR_VS:
76 case RISCV::VFREDMAX_VS:
77 case RISCV::VFREDMIN_VS:
78 case RISCV::VFREDUSUM_VS: {
85 case RISCV::VFREDOSUM_VS: {
99 case RISCV::VMANDN_MM:
100 case RISCV::VMNAND_MM:
102 case RISCV::VFIRST_M:
118 "getIntImmCost can only estimate cost of materialising integers");
139 auto *BO = dyn_cast<BinaryOperator>(Inst->
getOperand(0));
140 if (!BO || !BO->hasOneUse())
143 if (BO->getOpcode() != Instruction::Shl)
146 if (!isa<ConstantInt>(BO->getOperand(1)))
149 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
154 if (ShAmt == Trailing)
166 "getIntImmCost can only estimate cost of materialising integers");
174 bool Takes12BitImm =
false;
175 unsigned ImmArgIdx = ~0U;
178 case Instruction::GetElementPtr:
183 case Instruction::Store: {
188 if (
Idx == 1 || !Inst)
193 if (!getTLI()->allowsMemoryAccessForAlignment(
195 ST->getPointerAddressSpace(), ST->getAlign()))
201 case Instruction::Load:
204 case Instruction::And:
206 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
209 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
212 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
214 if (Inst &&
Idx == 1 && Imm.getBitWidth() <= ST->
getXLen() &&
217 Takes12BitImm =
true;
219 case Instruction::Add:
220 Takes12BitImm =
true;
222 case Instruction::Or:
223 case Instruction::Xor:
225 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
227 Takes12BitImm =
true;
229 case Instruction::Mul:
231 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
234 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
237 Takes12BitImm =
true;
239 case Instruction::Sub:
240 case Instruction::Shl:
241 case Instruction::LShr:
242 case Instruction::AShr:
243 Takes12BitImm =
true;
254 if (Imm.getSignificantBits() <= 64 &&
283 return ST->hasStdExtZbb() || (ST->hasVendorXCVbitmanip() && !ST->
is64Bit())
292 switch (
II->getIntrinsicID()) {
296 case Intrinsic::vector_reduce_mul:
297 case Intrinsic::vector_reduce_fmul:
347 unsigned Size = Mask.size();
350 for (
unsigned I = 0;
I !=
Size; ++
I) {
351 if (
static_cast<unsigned>(Mask[
I]) ==
I)
357 for (
unsigned J =
I + 1; J !=
Size; ++J)
359 if (
static_cast<unsigned>(Mask[J]) != J %
I)
375 return cast<VectorType>(
EVT(IndexVT).getTypeForEVT(
C));
391 if (isa<FixedVectorType>(Tp)) {
396 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
397 MVT EltTp = LT.second.getVectorElementType();
410 if (Mask[0] == 0 || Mask[0] == 1) {
414 if (
equal(DeinterleaveMask, Mask))
415 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
420 if (LT.second.getScalarSizeInBits() != 1 &&
423 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
425 for (
unsigned I = 0;
I != NumSlides; ++
I) {
426 unsigned InsertIndex = SubVectorSize * (1 <<
I);
431 std::pair<InstructionCost, MVT> DestLT =
445 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
446 (LT.second.getScalarSizeInBits() != 8 ||
447 LT.second.getVectorNumElements() <= 256)) {
451 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
460 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
461 (LT.second.getScalarSizeInBits() != 8 ||
462 LT.second.getVectorNumElements() <= 256)) {
469 return 2 * IndexCost +
470 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
480 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
481 LT.second.isFixedLengthVector() &&
482 LT.second.getVectorElementType().getSizeInBits() ==
484 LT.second.getVectorNumElements() <
485 cast<FixedVectorType>(Tp)->getNumElements() &&
487 cast<FixedVectorType>(Tp)->getNumElements()) ==
488 static_cast<unsigned>(*LT.first.getValue())) {
489 unsigned NumRegs = *LT.first.getValue();
490 unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
495 for (
unsigned I = 0, NumSrcRegs =
divideCeil(Mask.size(), SubVF);
496 I < NumSrcRegs; ++
I) {
497 bool IsSingleVector =
true;
500 Mask.slice(
I * SubVF,
501 I == NumSrcRegs - 1 ? Mask.size() % SubVF : SubVF),
502 SubMask.
begin(), [&](
int I) ->
int {
503 if (I == PoisonMaskElem)
504 return PoisonMaskElem;
505 bool SingleSubVector = I / VF == 0;
506 IsSingleVector &= SingleSubVector;
507 return (SingleSubVector ? 0 : 1) * SubVF + (I % VF) % SubVF;
511 static_cast<unsigned>(
P.value()) ==
P.index();
516 SubVecTy, SubMask,
CostKind, 0,
nullptr);
544 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
547 if (MinVLen == MaxVLen &&
548 SubLT.second.getScalarSizeInBits() * Index % MinVLen == 0 &&
549 SubLT.second.getSizeInBits() <= MinVLen)
557 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
563 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
575 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
580 Instruction::InsertElement);
581 if (LT.second.getScalarSizeInBits() == 1) {
589 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
602 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
603 RISCV::VMV_X_S, RISCV::VMV_V_X,
612 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
618 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
624 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
625 if (Index >= 0 && Index < 32)
626 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
627 else if (Index < 0 && Index > -32)
628 Opcodes[1] = RISCV::VSLIDEUP_VI;
629 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
647 if (LT.second.isFixedLengthVector())
649 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
650 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
651 if (LT.second.isFixedLengthVector() &&
652 isInt<5>(LT.second.getVectorNumElements() - 1))
653 Opcodes[1] = RISCV::VRSUB_VI;
655 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
658 return LT.first * (LenCost + GatherCost + ExtendCost);
673 if (isa<ScalableVectorType>(Ty))
681 Ty, DemandedElts, Insert, Extract,
CostKind);
683 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
694 assert(LT.second.isFixedLengthVector());
698 cast<FixedVectorType>(Ty)->getNumElements() *
699 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
722 bool UseMaskForCond,
bool UseMaskForGaps) {
727 if (!UseMaskForCond && !UseMaskForGaps &&
728 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
729 auto *VTy = cast<VectorType>(VecTy);
732 if (LT.second.isVector()) {
735 VTy->getElementCount().divideCoefficientBy(Factor));
736 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
747 return LT.first *
Cost;
754 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
755 unsigned NumLoads = getEstimatedVLFor(VTy);
756 return NumLoads * MemOpCost;
763 if (isa<ScalableVectorType>(VecTy))
766 auto *FVTy = cast<FixedVectorType>(VecTy);
769 unsigned VF = FVTy->getNumElements() / Factor;
776 if (Opcode == Instruction::Load) {
778 for (
unsigned Index : Indices) {
803 UseMaskForCond, UseMaskForGaps);
805 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
812 return MemCost + ShuffleCost;
816 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
822 if ((Opcode == Instruction::Load &&
824 (Opcode == Instruction::Store &&
832 auto &VTy = *cast<VectorType>(DataTy);
835 {TTI::OK_AnyValue, TTI::OP_None},
I);
836 unsigned NumLoads = getEstimatedVLFor(&VTy);
837 return NumLoads * MemOpCost;
841 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
843 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
845 (Opcode != Instruction::Load && Opcode != Instruction::Store))
855 auto &VTy = *cast<VectorType>(DataTy);
858 {TTI::OK_AnyValue, TTI::OP_None},
I);
859 unsigned NumLoads = getEstimatedVLFor(&VTy);
860 return NumLoads * MemOpCost;
870 for (
auto *Ty : Tys) {
871 if (!Ty->isVectorTy())
885 {Intrinsic::floor, MVT::f32, 9},
886 {Intrinsic::floor, MVT::f64, 9},
887 {Intrinsic::ceil, MVT::f32, 9},
888 {Intrinsic::ceil, MVT::f64, 9},
889 {Intrinsic::trunc, MVT::f32, 7},
890 {Intrinsic::trunc, MVT::f64, 7},
891 {Intrinsic::round, MVT::f32, 9},
892 {Intrinsic::round, MVT::f64, 9},
893 {Intrinsic::roundeven, MVT::f32, 9},
894 {Intrinsic::roundeven, MVT::f64, 9},
895 {Intrinsic::rint, MVT::f32, 7},
896 {Intrinsic::rint, MVT::f64, 7},
897 {Intrinsic::lrint, MVT::i32, 1},
898 {Intrinsic::lrint, MVT::i64, 1},
899 {Intrinsic::llrint, MVT::i64, 1},
900 {Intrinsic::nearbyint, MVT::f32, 9},
901 {Intrinsic::nearbyint, MVT::f64, 9},
902 {Intrinsic::bswap, MVT::i16, 3},
903 {Intrinsic::bswap, MVT::i32, 12},
904 {Intrinsic::bswap, MVT::i64, 31},
905 {Intrinsic::vp_bswap, MVT::i16, 3},
906 {Intrinsic::vp_bswap, MVT::i32, 12},
907 {Intrinsic::vp_bswap, MVT::i64, 31},
908 {Intrinsic::vp_fshl, MVT::i8, 7},
909 {Intrinsic::vp_fshl, MVT::i16, 7},
910 {Intrinsic::vp_fshl, MVT::i32, 7},
911 {Intrinsic::vp_fshl, MVT::i64, 7},
912 {Intrinsic::vp_fshr, MVT::i8, 7},
913 {Intrinsic::vp_fshr, MVT::i16, 7},
914 {Intrinsic::vp_fshr, MVT::i32, 7},
915 {Intrinsic::vp_fshr, MVT::i64, 7},
916 {Intrinsic::bitreverse, MVT::i8, 17},
917 {Intrinsic::bitreverse, MVT::i16, 24},
918 {Intrinsic::bitreverse, MVT::i32, 33},
919 {Intrinsic::bitreverse, MVT::i64, 52},
920 {Intrinsic::vp_bitreverse, MVT::i8, 17},
921 {Intrinsic::vp_bitreverse, MVT::i16, 24},
922 {Intrinsic::vp_bitreverse, MVT::i32, 33},
923 {Intrinsic::vp_bitreverse, MVT::i64, 52},
924 {Intrinsic::ctpop, MVT::i8, 12},
925 {Intrinsic::ctpop, MVT::i16, 19},
926 {Intrinsic::ctpop, MVT::i32, 20},
927 {Intrinsic::ctpop, MVT::i64, 21},
928 {Intrinsic::ctlz, MVT::i8, 19},
929 {Intrinsic::ctlz, MVT::i16, 28},
930 {Intrinsic::ctlz, MVT::i32, 31},
931 {Intrinsic::ctlz, MVT::i64, 35},
932 {Intrinsic::cttz, MVT::i8, 16},
933 {Intrinsic::cttz, MVT::i16, 23},
934 {Intrinsic::cttz, MVT::i32, 24},
935 {Intrinsic::cttz, MVT::i64, 25},
936 {Intrinsic::vp_ctpop, MVT::i8, 12},
937 {Intrinsic::vp_ctpop, MVT::i16, 19},
938 {Intrinsic::vp_ctpop, MVT::i32, 20},
939 {Intrinsic::vp_ctpop, MVT::i64, 21},
940 {Intrinsic::vp_ctlz, MVT::i8, 19},
941 {Intrinsic::vp_ctlz, MVT::i16, 28},
942 {Intrinsic::vp_ctlz, MVT::i32, 31},
943 {Intrinsic::vp_ctlz, MVT::i64, 35},
944 {Intrinsic::vp_cttz, MVT::i8, 16},
945 {Intrinsic::vp_cttz, MVT::i16, 23},
946 {Intrinsic::vp_cttz, MVT::i32, 24},
947 {Intrinsic::vp_cttz, MVT::i64, 25},
952#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
953 case Intrinsic::VPID: \
955#include "llvm/IR/VPIntrinsics.def"
956#undef HELPER_MAP_VPID_TO_VPSD
965 switch (ICA.
getID()) {
966 case Intrinsic::lrint:
967 case Intrinsic::llrint:
969 if (
auto *VecTy = dyn_cast<VectorType>(ICA.
getArgTypes()[0]);
970 VecTy && VecTy->getElementType()->is16bitFPTy())
973 case Intrinsic::ceil:
974 case Intrinsic::floor:
975 case Intrinsic::trunc:
976 case Intrinsic::rint:
977 case Intrinsic::round:
978 case Intrinsic::roundeven: {
985 case Intrinsic::umin:
986 case Intrinsic::umax:
987 case Intrinsic::smin:
988 case Intrinsic::smax: {
990 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
995 switch (ICA.
getID()) {
996 case Intrinsic::umin:
997 Op = RISCV::VMINU_VV;
999 case Intrinsic::umax:
1000 Op = RISCV::VMAXU_VV;
1002 case Intrinsic::smin:
1003 Op = RISCV::VMIN_VV;
1005 case Intrinsic::smax:
1006 Op = RISCV::VMAX_VV;
1009 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1013 case Intrinsic::sadd_sat:
1014 case Intrinsic::ssub_sat:
1015 case Intrinsic::uadd_sat:
1016 case Intrinsic::usub_sat: {
1020 switch (ICA.
getID()) {
1021 case Intrinsic::sadd_sat:
1022 Op = RISCV::VSADD_VV;
1024 case Intrinsic::ssub_sat:
1025 Op = RISCV::VSSUBU_VV;
1027 case Intrinsic::uadd_sat:
1028 Op = RISCV::VSADDU_VV;
1030 case Intrinsic::usub_sat:
1031 Op = RISCV::VSSUBU_VV;
1034 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1038 case Intrinsic::fabs:
1039 case Intrinsic::sqrt: {
1044 switch (ICA.
getID()) {
1045 case Intrinsic::fabs:
1046 Op = RISCV::VFSGNJX_VV;
1048 case Intrinsic::sqrt:
1049 Op = RISCV::VFSQRT_V;
1052 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1056 case Intrinsic::cttz:
1057 case Intrinsic::ctlz:
1058 case Intrinsic::ctpop: {
1060 if (ST->
hasVInstructions() && ST->hasStdExtZvbb() && LT.second.isVector()) {
1062 switch (ICA.
getID()) {
1063 case Intrinsic::cttz:
1066 case Intrinsic::ctlz:
1069 case Intrinsic::ctpop:
1070 Op = RISCV::VCPOP_V;
1073 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1077 case Intrinsic::abs: {
1083 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1088 case Intrinsic::get_active_lane_mask: {
1098 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1104 case Intrinsic::stepvector: {
1109 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1111 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1112 return 1 + (LT.first - 1);
1114 case Intrinsic::experimental_cttz_elts: {
1126 cast<ConstantInt>(ICA.
getArgs()[1])->isZero())
1134 case Intrinsic::vp_rint: {
1139 return Cost * LT.first;
1142 case Intrinsic::vp_nearbyint: {
1147 return Cost * LT.first;
1150 case Intrinsic::vp_ceil:
1151 case Intrinsic::vp_floor:
1152 case Intrinsic::vp_round:
1153 case Intrinsic::vp_roundeven:
1154 case Intrinsic::vp_roundtozero: {
1161 return Cost * LT.first;
1164 case Intrinsic::vp_fneg: {
1165 std::optional<unsigned> FOp =
1171 case Intrinsic::vp_select: {
1178 case Intrinsic::vp_merge:
1182 case Intrinsic::experimental_vp_splat: {
1187 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1196 LT.second.isVector()) {
1197 MVT EltTy = LT.second.getVectorElementType();
1199 ICA.
getID(), EltTy))
1200 return LT.first * Entry->Cost;
1212 bool IsVectorType = isa<VectorType>(Dst) && isa<VectorType>(Src);
1220 Dst->getScalarSizeInBits() > ST->
getELen())
1224 assert(ISD &&
"Invalid opcode");
1238 if (Src->getScalarSizeInBits() == 1) {
1243 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1244 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1250 if (Dst->getScalarSizeInBits() == 1) {
1256 return SrcLT.first *
1257 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1269 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1271 SrcLT.second.getSizeInBits()) ||
1273 DstLT.second.getSizeInBits()))
1277 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1279 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1280 (
int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1284 if ((PowDiff < 1) || (PowDiff > 3))
1286 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1287 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1290 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1296 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1297 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1301 : RISCV::VFNCVT_F_F_W;
1303 for (; SrcEltSize != DstEltSize;) {
1309 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1317 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1319 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1321 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1322 unsigned SrcEltSize = Src->getScalarSizeInBits();
1323 unsigned DstEltSize = Dst->getScalarSizeInBits();
1325 if ((SrcEltSize == 16) &&
1331 cast<VectorType>(Dst)->getElementCount());
1332 std::pair<InstructionCost, MVT> VecF32LT =
1335 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1340 if (DstEltSize == SrcEltSize)
1341 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1342 else if (DstEltSize > SrcEltSize)
1343 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1349 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1350 if ((SrcEltSize / 2) > DstEltSize) {
1361 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1362 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1363 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1364 unsigned SrcEltSize = Src->getScalarSizeInBits();
1365 unsigned DstEltSize = Dst->getScalarSizeInBits();
1368 if ((DstEltSize == 16) &&
1374 cast<VectorType>(Dst)->getElementCount());
1375 std::pair<InstructionCost, MVT> VecF32LT =
1378 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1383 if (DstEltSize == SrcEltSize)
1384 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1385 else if (DstEltSize > SrcEltSize) {
1386 if ((DstEltSize / 2) > SrcEltSize) {
1389 cast<VectorType>(Dst)->getElementCount());
1390 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1393 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1395 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1402unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty) {
1403 if (isa<ScalableVectorType>(Ty)) {
1409 return cast<FixedVectorType>(Ty)->getNumElements();
1428 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1434 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1438 case Intrinsic::maximum:
1440 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1442 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1457 case Intrinsic::minimum:
1459 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1461 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1476 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1485 case Intrinsic::smax:
1486 SplitOp = RISCV::VMAX_VV;
1487 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1489 case Intrinsic::smin:
1490 SplitOp = RISCV::VMIN_VV;
1491 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1493 case Intrinsic::umax:
1494 SplitOp = RISCV::VMAXU_VV;
1495 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1497 case Intrinsic::umin:
1498 SplitOp = RISCV::VMINU_VV;
1499 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1501 case Intrinsic::maxnum:
1502 SplitOp = RISCV::VFMAX_VV;
1503 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1505 case Intrinsic::minnum:
1506 SplitOp = RISCV::VFMIN_VV;
1507 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1512 (LT.first > 1) ? (LT.first - 1) *
1513 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1515 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1520 std::optional<FastMathFlags> FMF,
1530 assert(ISD &&
"Invalid opcode");
1546 return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
1548 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1557 return (LT.first - 1) *
1558 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
1559 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
1566 return (LT.first - 1) *
1567 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
1568 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
1581 SplitOp = RISCV::VADD_VV;
1582 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1585 SplitOp = RISCV::VOR_VV;
1586 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
1589 SplitOp = RISCV::VXOR_VV;
1590 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1593 SplitOp = RISCV::VAND_VV;
1594 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
1598 if ((LT.second.getVectorElementType() == MVT::f16 &&
1600 LT.second.getVectorElementType() == MVT::bf16)
1604 for (
unsigned i = 0; i < LT.first.getValue(); i++)
1607 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1609 SplitOp = RISCV::VFADD_VV;
1610 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1615 (LT.first > 1) ? (LT.first - 1) *
1616 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1618 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1622 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1633 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
1639 if (IsUnsigned && Opcode == Instruction::Add &&
1640 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
1644 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
1651 return (LT.first - 1) +
1659 if (!isa<VectorType>(Ty))
1671 return getConstantPoolLoadCost(Ty,
CostKind);
1683 if (VT == MVT::Other)
1688 if (Opcode == Instruction::Store && OpInfo.
isConstant())
1703 if (Src->
isVectorTy() && LT.second.isVector() &&
1705 LT.second.getSizeInBits()))
1717 return Cost + BaseCost;
1727 Op1Info, Op2Info,
I);
1731 Op1Info, Op2Info,
I);
1736 Op1Info, Op2Info,
I);
1738 auto GetConstantMatCost =
1740 if (OpInfo.isUniform())
1745 return getConstantPoolLoadCost(ValTy,
CostKind);
1750 ConstantMatCost += GetConstantMatCost(Op1Info);
1752 ConstantMatCost += GetConstantMatCost(Op2Info);
1755 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
1761 return ConstantMatCost +
1763 getRISCVInstructionCost(
1764 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1768 return ConstantMatCost +
1769 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
1779 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
1780 return ConstantMatCost +
1782 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
1784 LT.first * getRISCVInstructionCost(
1785 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1792 return ConstantMatCost +
1793 LT.first * getRISCVInstructionCost(
1794 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
1798 if ((Opcode == Instruction::ICmp) && ValTy->
isVectorTy() &&
1802 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
1807 if ((Opcode == Instruction::FCmp) && ValTy->
isVectorTy() &&
1812 return ConstantMatCost +
1813 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
1823 Op1Info, Op2Info,
I);
1832 return ConstantMatCost +
1833 LT.first * getRISCVInstructionCost(
1834 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
1841 return ConstantMatCost +
1843 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
1852 return ConstantMatCost +
1854 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
1867 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
1868 U->getType()->isIntegerTy() &&
1869 !isa<ConstantData>(U->getOperand(1)) &&
1870 !isa<ConstantData>(U->getOperand(2));
1878 Op1Info, Op2Info,
I);
1885 return Opcode == Instruction::PHI ? 0 : 1;
1892 unsigned Index,
Value *Op0,
1896 if (Opcode != Instruction::ExtractElement &&
1897 Opcode != Instruction::InsertElement)
1904 if (!LT.second.isVector()) {
1905 auto *FixedVecTy = cast<FixedVectorType>(Val);
1913 Type *ElemTy = FixedVecTy->getElementType();
1914 auto NumElems = FixedVecTy->getNumElements();
1920 return Opcode == Instruction::ExtractElement
1921 ? StoreCost * NumElems + LoadCost
1922 : (StoreCost + LoadCost) * NumElems + StoreCost;
1926 if (LT.second.isScalableVector() && !LT.first.isValid())
1933 cast<VectorType>(Val)->getElementCount());
1934 if (Opcode == Instruction::ExtractElement) {
1940 return ExtendCost + ExtractCost;
1950 return ExtendCost + InsertCost + TruncCost;
1956 unsigned BaseCost = 1;
1958 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
1963 if (LT.second.isFixedLengthVector()) {
1964 unsigned Width = LT.second.getVectorNumElements();
1965 Index = Index % Width;
1971 unsigned EltSize = LT.second.getScalarSizeInBits();
1972 unsigned M1Max = *VLEN / EltSize;
1973 Index = Index % M1Max;
1979 else if (Opcode == Instruction::InsertElement)
1987 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
1988 LT.second.isScalableVector()))) {
1996 if (Opcode == Instruction::ExtractElement)
2032 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2034 return BaseCost + SlideCost;
2060 if (!LT.second.isVector())
2068 if ((LT.second.getVectorElementType() == MVT::f16 ||
2069 LT.second.getVectorElementType() == MVT::bf16) &&
2076 CastCost += LT.first * Args.size() *
2084 LT.second = PromotedVT;
2087 auto getConstantMatCost =
2097 return getConstantPoolLoadCost(Ty,
CostKind);
2103 ConstantMatCost += getConstantMatCost(0, Op1Info);
2105 ConstantMatCost += getConstantMatCost(1, Op2Info);
2108 switch (ISDOpcode) {
2111 Op = RISCV::VADD_VV;
2116 Op = RISCV::VSLL_VV;
2126 Op = RISCV::VMUL_VV;
2130 Op = RISCV::VDIV_VV;
2134 Op = RISCV::VREM_VV;
2138 Op = RISCV::VFADD_VV;
2141 Op = RISCV::VFMUL_VV;
2144 Op = RISCV::VFDIV_VV;
2147 Op = RISCV::VFSGNJN_VV;
2152 return CastCost + ConstantMatCost +
2163 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2183 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
2186 if (
Info.isSameBase() && V !=
Base) {
2187 if (
GEP->hasAllConstantIndices())
2194 if (
Info.isUnitStride() &&
2200 GEP->getType()->getPointerAddressSpace()))
2203 {TTI::OK_AnyValue, TTI::OP_None},
2204 {TTI::OK_AnyValue, TTI::OP_None}, {});
2221 if (ST->enableDefaultUnroll())
2231 if (L->getHeader()->getParent()->hasOptSize())
2235 L->getExitingBlocks(ExitingBlocks);
2237 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2238 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2242 if (ExitingBlocks.
size() > 2)
2247 if (L->getNumBlocks() > 4)
2257 for (
auto *BB : L->getBlocks()) {
2258 for (
auto &
I : *BB) {
2261 if (
I.getType()->isVectorTy())
2264 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
2299 Type *EltTy = cast<VectorType>(Ty)->getElementType();
2303 cast<VectorType>(Ty));
2329 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
2348 auto *VTy = dyn_cast<VectorType>(DataTy);
2349 if (!VTy || VTy->isScalableTy())
2357 if (VTy->getElementType()->isIntegerTy(8))
2358 if (VTy->getElementCount().getFixedValue() > 256)
2365 auto *VTy = dyn_cast<VectorType>(DataTy);
2366 if (!VTy || VTy->isScalableTy())
2380 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
2381 bool Considerable =
false;
2382 AllowPromotionWithoutCommonHeader =
false;
2383 if (!isa<SExtInst>(&
I))
2385 Type *ConsideredSExtType =
2387 if (
I.getType() != ConsideredSExtType)
2391 for (
const User *U :
I.users()) {
2393 Considerable =
true;
2397 if (GEPInst->getNumOperands() > 2) {
2398 AllowPromotionWithoutCommonHeader =
true;
2403 return Considerable;
2408 case Instruction::Add:
2409 case Instruction::Sub:
2410 case Instruction::Mul:
2411 case Instruction::And:
2412 case Instruction::Or:
2413 case Instruction::Xor:
2414 case Instruction::FAdd:
2415 case Instruction::FSub:
2416 case Instruction::FMul:
2417 case Instruction::FDiv:
2418 case Instruction::ICmp:
2419 case Instruction::FCmp:
2421 case Instruction::Shl:
2422 case Instruction::LShr:
2423 case Instruction::AShr:
2424 case Instruction::UDiv:
2425 case Instruction::SDiv:
2426 case Instruction::URem:
2427 case Instruction::SRem:
2428 case Instruction::Select:
2429 return Operand == 1;
2442 auto *
II = dyn_cast<IntrinsicInst>(
I);
2446 switch (
II->getIntrinsicID()) {
2447 case Intrinsic::fma:
2448 case Intrinsic::vp_fma:
2449 case Intrinsic::fmuladd:
2450 case Intrinsic::vp_fmuladd:
2451 return Operand == 0 || Operand == 1;
2452 case Intrinsic::vp_shl:
2453 case Intrinsic::vp_lshr:
2454 case Intrinsic::vp_ashr:
2455 case Intrinsic::vp_udiv:
2456 case Intrinsic::vp_sdiv:
2457 case Intrinsic::vp_urem:
2458 case Intrinsic::vp_srem:
2459 case Intrinsic::ssub_sat:
2460 case Intrinsic::vp_ssub_sat:
2461 case Intrinsic::usub_sat:
2462 case Intrinsic::vp_usub_sat:
2463 case Intrinsic::vp_select:
2464 return Operand == 1;
2466 case Intrinsic::vp_add:
2467 case Intrinsic::vp_mul:
2468 case Intrinsic::vp_and:
2469 case Intrinsic::vp_or:
2470 case Intrinsic::vp_xor:
2471 case Intrinsic::vp_fadd:
2472 case Intrinsic::vp_fmul:
2473 case Intrinsic::vp_icmp:
2474 case Intrinsic::vp_fcmp:
2475 case Intrinsic::smin:
2476 case Intrinsic::vp_smin:
2477 case Intrinsic::umin:
2478 case Intrinsic::vp_umin:
2479 case Intrinsic::smax:
2480 case Intrinsic::vp_smax:
2481 case Intrinsic::umax:
2482 case Intrinsic::vp_umax:
2483 case Intrinsic::sadd_sat:
2484 case Intrinsic::vp_sadd_sat:
2485 case Intrinsic::uadd_sat:
2486 case Intrinsic::vp_uadd_sat:
2488 case Intrinsic::vp_sub:
2489 case Intrinsic::vp_fsub:
2490 case Intrinsic::vp_fdiv:
2491 return Operand == 0 || Operand == 1;
2512 if (!ST->sinkSplatOperands())
2515 for (
auto OpIdx :
enumerate(
I->operands())) {
2519 Instruction *
Op = dyn_cast<Instruction>(OpIdx.value().get());
2521 if (!
Op ||
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op; }))
2530 if (cast<VectorType>(
Op->getType())->getElementType()->isIntegerTy(1))
2535 for (
Use &U :
Op->uses()) {
2552 if (!(ST->enableUnalignedScalarMem() &&
2553 (ST->hasStdExtZbb() || ST->hasStdExtZbkb() || IsZeroCmp)))
2556 Options.AllowOverlappingLoads =
true;
2560 Options.LoadSizes = {8, 4, 2, 1};
2562 Options.LoadSizes = {4, 2, 1};
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
unsigned getMaxLMULForFixedLengthVectors() const
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
std::optional< unsigned > getRealVLen() const
bool hasOptimizedSegmentLoadStore(unsigned NF) const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)
bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
MVT getContainerForFixedLengthVector(MVT VT) const
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVII::VLMUL getLMUL(MVT VT)
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
The main scalar evolution driver.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
MVT getTypeToPromoteTo(unsigned Op, MVT VT) const
If the action for this operation is to promote, this method returns the ValueType to promote to.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_Undef()
Match an arbitrary undef constant.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.