18#include "llvm/IR/IntrinsicsRISCV.h"
25#define DEBUG_TYPE "riscvtti"
28 "riscv-v-register-bit-width-lmul",
30 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
31 "by autovectorized code. Fractional LMULs are not supported."),
37 "Overrides result used for getMaximumVF query which is used "
38 "exclusively by SLP vectorizer."),
43 cl::desc(
"Set the lower bound of a trip count to decide on "
44 "vectorization while tail-folding."),
53 size_t NumInstr = OpCodes.size();
58 return LMULCost * NumInstr;
60 for (
auto Op : OpCodes) {
62 case RISCV::VRGATHER_VI:
65 case RISCV::VRGATHER_VV:
68 case RISCV::VSLIDEUP_VI:
69 case RISCV::VSLIDEDOWN_VI:
72 case RISCV::VSLIDEUP_VX:
73 case RISCV::VSLIDEDOWN_VX:
76 case RISCV::VREDMAX_VS:
77 case RISCV::VREDMIN_VS:
78 case RISCV::VREDMAXU_VS:
79 case RISCV::VREDMINU_VS:
80 case RISCV::VREDSUM_VS:
81 case RISCV::VREDAND_VS:
82 case RISCV::VREDOR_VS:
83 case RISCV::VREDXOR_VS:
84 case RISCV::VFREDMAX_VS:
85 case RISCV::VFREDMIN_VS:
86 case RISCV::VFREDUSUM_VS: {
93 case RISCV::VFREDOSUM_VS: {
102 case RISCV::VFMV_F_S:
103 case RISCV::VFMV_S_F:
105 case RISCV::VMXOR_MM:
106 case RISCV::VMAND_MM:
107 case RISCV::VMANDN_MM:
108 case RISCV::VMNAND_MM:
110 case RISCV::VFIRST_M:
125 assert(Ty->isIntegerTy() &&
126 "getIntImmCost can only estimate cost of materialising integers");
149 if (!BO || !BO->hasOneUse())
152 if (BO->getOpcode() != Instruction::Shl)
163 if (ShAmt == Trailing)
180 if (!Cmp || !Cmp->isEquality())
196 if ((CmpC & Mask) != CmpC)
203 return NewCmpC >= -2048 && NewCmpC <= 2048;
210 assert(Ty->isIntegerTy() &&
211 "getIntImmCost can only estimate cost of materialising integers");
219 bool Takes12BitImm =
false;
220 unsigned ImmArgIdx = ~0U;
223 case Instruction::GetElementPtr:
228 case Instruction::Store: {
233 if (Idx == 1 || !Inst)
238 if (!getTLI()->allowsMemoryAccessForAlignment(
246 case Instruction::Load:
249 case Instruction::And:
251 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
254 if (Imm == UINT64_C(0xffffffff) &&
255 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
258 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
260 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
263 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
266 Takes12BitImm =
true;
268 case Instruction::Add:
269 Takes12BitImm =
true;
271 case Instruction::Or:
272 case Instruction::Xor:
274 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
276 Takes12BitImm =
true;
278 case Instruction::Mul:
280 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
283 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
286 Takes12BitImm =
true;
288 case Instruction::Sub:
289 case Instruction::Shl:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Takes12BitImm =
true;
303 if (Imm.getSignificantBits() <= 64 &&
326 return ST->hasVInstructions();
336 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
343 if (!ST->hasStdExtZvqdotq() || ST->getELen() < 64 ||
344 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
345 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
353 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
CostKind);
360 switch (
II->getIntrinsicID()) {
364 case Intrinsic::vector_reduce_mul:
365 case Intrinsic::vector_reduce_fmul:
371 if (ST->hasVInstructions())
377 if (ST->hasVInstructions())
378 if (
unsigned MinVLen = ST->getRealMinVLen();
393 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
396 (ST->hasVInstructions() &&
406RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
416 unsigned Size = Mask.size();
419 for (
unsigned I = 0;
I !=
Size; ++
I) {
420 if (
static_cast<unsigned>(Mask[
I]) ==
I)
426 for (
unsigned J =
I + 1; J !=
Size; ++J)
428 if (
static_cast<unsigned>(Mask[J]) != J %
I)
456 "Expected fixed vector type and non-empty mask");
459 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
463 if (NumOfDests <= 1 ||
465 Tp->getElementType()->getPrimitiveSizeInBits() ||
466 LegalNumElts >= Tp->getElementCount().getFixedValue())
469 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
472 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
476 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
477 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
478 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
480 assert(NormalizedVF >= Mask.size() &&
481 "Normalized mask expected to be not shorter than original mask.");
486 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
487 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
490 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
493 Cost +=
TTI.getShuffleCost(
496 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
498 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
499 Cost +=
TTI.getShuffleCost(
502 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
525 if (!VLen || Mask.empty())
529 LegalVT =
TTI.getTypeLegalizationCost(
535 if (NumOfDests <= 1 ||
537 Tp->getElementType()->getPrimitiveSizeInBits() ||
541 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
544 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
550 unsigned NormalizedVF =
555 assert(NormalizedVF >= Mask.size() &&
556 "Normalized mask expected to be not shorter than original mask.");
562 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
563 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
566 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
571 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
573 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
575 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
582 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
583 (NumOfDestRegs <= 2 && NumShuffles < 4))
598 if (!
LT.second.isFixedLengthVector())
606 auto GetSlideOpcode = [&](
int SlideAmt) {
608 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
610 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
611 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
614 std::array<std::pair<int, int>, 2> SrcInfo;
618 if (SrcInfo[1].second == 0)
622 if (SrcInfo[0].second != 0) {
623 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
624 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
627 if (SrcInfo[1].first == -1)
628 return FirstSlideCost;
631 if (SrcInfo[1].second != 0) {
632 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
633 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
636 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
643 return FirstSlideCost + SecondSlideCost + MaskCost;
654 "Expected the Mask to match the return size if given");
656 "Expected the same scalar types");
665 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
667 *
this, LT.second, ST->getRealVLen(),
669 if (VRegSplittingCost.
isValid())
670 return VRegSplittingCost;
675 if (Mask.size() >= 2) {
676 MVT EltTp = LT.second.getVectorElementType();
687 return 2 * LT.first * TLI->getLMULCost(LT.second);
689 if (Mask[0] == 0 || Mask[0] == 1) {
693 if (
equal(DeinterleaveMask, Mask))
694 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
699 if (LT.second.getScalarSizeInBits() != 1 &&
702 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
704 for (
unsigned I = 0;
I != NumSlides; ++
I) {
705 unsigned InsertIndex = SubVectorSize * (1 <<
I);
710 std::pair<InstructionCost, MVT> DestLT =
715 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
729 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
730 LT.second.getVectorNumElements() <= 256)) {
735 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
749 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
750 LT.second.getVectorNumElements() <= 256)) {
751 auto &
C = SrcTy->getContext();
752 auto EC = SrcTy->getElementCount();
757 return 2 * IndexCost +
758 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
777 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
805 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
806 if (std::optional<unsigned> VLen = ST->getRealVLen();
807 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
808 SubLT.second.getSizeInBits() <= *VLen)
816 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
823 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
835 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
840 Instruction::InsertElement);
841 if (LT.second.getScalarSizeInBits() == 1) {
849 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
862 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
863 RISCV::VMV_X_S, RISCV::VMV_V_X,
872 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
878 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
884 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
885 if (Index >= 0 && Index < 32)
886 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
887 else if (Index < 0 && Index > -32)
888 Opcodes[1] = RISCV::VSLIDEUP_VI;
889 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
893 if (!LT.second.isVector())
899 if (SrcTy->getElementType()->isIntegerTy(1)) {
911 MVT ContainerVT = LT.second;
912 if (LT.second.isFixedLengthVector())
913 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
915 if (ContainerVT.
bitsLE(M1VT)) {
925 if (LT.second.isFixedLengthVector())
927 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
928 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
929 if (LT.second.isFixedLengthVector() &&
930 isInt<5>(LT.second.getVectorNumElements() - 1))
931 Opcodes[1] = RISCV::VRSUB_VI;
933 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
934 return LT.first * (LenCost + GatherCost);
941 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
943 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
947 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
949 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
950 return FixedCost + LT.first * (GatherCost + SlideCost);
984 Ty, DemandedElts, Insert, Extract,
CostKind);
986 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
987 if (Ty->getScalarSizeInBits() == 1) {
997 assert(LT.second.isFixedLengthVector());
998 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
1002 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
1015 switch (MICA.
getID()) {
1016 case Intrinsic::vp_load_ff: {
1017 EVT DataTypeVT = TLI->getValueType(
DL, DataTy);
1018 if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
1032 unsigned Opcode = MICA.
getID() == Intrinsic::masked_load ? Instruction::Load
1033 : Instruction::Store;
1048 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1054 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1058 if (LT.second.isVector()) {
1061 VTy->getElementCount().divideCoefficientBy(Factor));
1062 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1063 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1068 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1071 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1072 Cost += Factor * TLI->getLMULCost(SubVecVT);
1073 return LT.first *
Cost;
1080 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1081 unsigned NumLoads = getEstimatedVLFor(VTy);
1082 return NumLoads * MemOpCost;
1095 unsigned VF = FVTy->getNumElements() / Factor;
1102 if (Opcode == Instruction::Load) {
1104 for (
unsigned Index : Indices) {
1108 Mask.resize(VF * Factor, -1);
1112 Cost += ShuffleCost;
1130 UseMaskForCond, UseMaskForGaps);
1132 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1139 return MemCost + ShuffleCost;
1146 bool IsLoad = MICA.
getID() == Intrinsic::masked_gather ||
1147 MICA.
getID() == Intrinsic::vp_gather;
1148 unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
1155 if ((Opcode == Instruction::Load &&
1157 (Opcode == Instruction::Store &&
1167 {TTI::OK_AnyValue, TTI::OP_None},
I);
1168 unsigned NumLoads = getEstimatedVLFor(&VTy);
1169 return NumLoads * MemOpCost;
1175 unsigned Opcode = MICA.
getID() == Intrinsic::masked_expandload
1177 : Instruction::Store;
1181 bool IsLegal = (Opcode == Instruction::Store &&
1183 (Opcode == Instruction::Load &&
1207 if (Opcode == Instruction::Store)
1208 Opcodes.
append({RISCV::VCOMPRESS_VM});
1210 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1212 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1219 unsigned Opcode = MICA.
getID() == Intrinsic::experimental_vp_strided_load
1221 : Instruction::Store;
1239 {TTI::OK_AnyValue, TTI::OP_None},
I);
1240 unsigned NumLoads = getEstimatedVLFor(&VTy);
1241 return NumLoads * MemOpCost;
1251 for (
auto *Ty : Tys) {
1252 if (!Ty->isVectorTy())
1266 {Intrinsic::floor, MVT::f32, 9},
1267 {Intrinsic::floor, MVT::f64, 9},
1268 {Intrinsic::ceil, MVT::f32, 9},
1269 {Intrinsic::ceil, MVT::f64, 9},
1270 {Intrinsic::trunc, MVT::f32, 7},
1271 {Intrinsic::trunc, MVT::f64, 7},
1272 {Intrinsic::round, MVT::f32, 9},
1273 {Intrinsic::round, MVT::f64, 9},
1274 {Intrinsic::roundeven, MVT::f32, 9},
1275 {Intrinsic::roundeven, MVT::f64, 9},
1276 {Intrinsic::rint, MVT::f32, 7},
1277 {Intrinsic::rint, MVT::f64, 7},
1278 {Intrinsic::nearbyint, MVT::f32, 9},
1279 {Intrinsic::nearbyint, MVT::f64, 9},
1280 {Intrinsic::bswap, MVT::i16, 3},
1281 {Intrinsic::bswap, MVT::i32, 12},
1282 {Intrinsic::bswap, MVT::i64, 31},
1283 {Intrinsic::vp_bswap, MVT::i16, 3},
1284 {Intrinsic::vp_bswap, MVT::i32, 12},
1285 {Intrinsic::vp_bswap, MVT::i64, 31},
1286 {Intrinsic::vp_fshl, MVT::i8, 7},
1287 {Intrinsic::vp_fshl, MVT::i16, 7},
1288 {Intrinsic::vp_fshl, MVT::i32, 7},
1289 {Intrinsic::vp_fshl, MVT::i64, 7},
1290 {Intrinsic::vp_fshr, MVT::i8, 7},
1291 {Intrinsic::vp_fshr, MVT::i16, 7},
1292 {Intrinsic::vp_fshr, MVT::i32, 7},
1293 {Intrinsic::vp_fshr, MVT::i64, 7},
1294 {Intrinsic::bitreverse, MVT::i8, 17},
1295 {Intrinsic::bitreverse, MVT::i16, 24},
1296 {Intrinsic::bitreverse, MVT::i32, 33},
1297 {Intrinsic::bitreverse, MVT::i64, 52},
1298 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1299 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1300 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1301 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1302 {Intrinsic::ctpop, MVT::i8, 12},
1303 {Intrinsic::ctpop, MVT::i16, 19},
1304 {Intrinsic::ctpop, MVT::i32, 20},
1305 {Intrinsic::ctpop, MVT::i64, 21},
1306 {Intrinsic::ctlz, MVT::i8, 19},
1307 {Intrinsic::ctlz, MVT::i16, 28},
1308 {Intrinsic::ctlz, MVT::i32, 31},
1309 {Intrinsic::ctlz, MVT::i64, 35},
1310 {Intrinsic::cttz, MVT::i8, 16},
1311 {Intrinsic::cttz, MVT::i16, 23},
1312 {Intrinsic::cttz, MVT::i32, 24},
1313 {Intrinsic::cttz, MVT::i64, 25},
1314 {Intrinsic::vp_ctpop, MVT::i8, 12},
1315 {Intrinsic::vp_ctpop, MVT::i16, 19},
1316 {Intrinsic::vp_ctpop, MVT::i32, 20},
1317 {Intrinsic::vp_ctpop, MVT::i64, 21},
1318 {Intrinsic::vp_ctlz, MVT::i8, 19},
1319 {Intrinsic::vp_ctlz, MVT::i16, 28},
1320 {Intrinsic::vp_ctlz, MVT::i32, 31},
1321 {Intrinsic::vp_ctlz, MVT::i64, 35},
1322 {Intrinsic::vp_cttz, MVT::i8, 16},
1323 {Intrinsic::vp_cttz, MVT::i16, 23},
1324 {Intrinsic::vp_cttz, MVT::i32, 24},
1325 {Intrinsic::vp_cttz, MVT::i64, 25},
1332 switch (ICA.
getID()) {
1333 case Intrinsic::lrint:
1334 case Intrinsic::llrint:
1335 case Intrinsic::lround:
1336 case Intrinsic::llround: {
1340 if (ST->hasVInstructions() && LT.second.isVector()) {
1342 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1343 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1344 if (LT.second.getVectorElementType() == MVT::bf16) {
1345 if (!ST->hasVInstructionsBF16Minimal())
1348 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1350 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1351 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1352 !ST->hasVInstructionsF16()) {
1353 if (!ST->hasVInstructionsF16Minimal())
1356 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1358 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1360 }
else if (SrcEltSz > DstEltSz) {
1361 Ops = {RISCV::VFNCVT_X_F_W};
1362 }
else if (SrcEltSz < DstEltSz) {
1363 Ops = {RISCV::VFWCVT_X_F_V};
1365 Ops = {RISCV::VFCVT_X_F_V};
1370 if (SrcEltSz > DstEltSz)
1371 return SrcLT.first *
1372 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1373 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1377 case Intrinsic::ceil:
1378 case Intrinsic::floor:
1379 case Intrinsic::trunc:
1380 case Intrinsic::rint:
1381 case Intrinsic::round:
1382 case Intrinsic::roundeven: {
1385 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
1386 return LT.first * 8;
1389 case Intrinsic::umin:
1390 case Intrinsic::umax:
1391 case Intrinsic::smin:
1392 case Intrinsic::smax: {
1394 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1397 if (ST->hasVInstructions() && LT.second.isVector()) {
1399 switch (ICA.
getID()) {
1400 case Intrinsic::umin:
1401 Op = RISCV::VMINU_VV;
1403 case Intrinsic::umax:
1404 Op = RISCV::VMAXU_VV;
1406 case Intrinsic::smin:
1407 Op = RISCV::VMIN_VV;
1409 case Intrinsic::smax:
1410 Op = RISCV::VMAX_VV;
1413 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1417 case Intrinsic::sadd_sat:
1418 case Intrinsic::ssub_sat:
1419 case Intrinsic::uadd_sat:
1420 case Intrinsic::usub_sat: {
1422 if (ST->hasVInstructions() && LT.second.isVector()) {
1424 switch (ICA.
getID()) {
1425 case Intrinsic::sadd_sat:
1426 Op = RISCV::VSADD_VV;
1428 case Intrinsic::ssub_sat:
1429 Op = RISCV::VSSUBU_VV;
1431 case Intrinsic::uadd_sat:
1432 Op = RISCV::VSADDU_VV;
1434 case Intrinsic::usub_sat:
1435 Op = RISCV::VSSUBU_VV;
1438 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1442 case Intrinsic::fma:
1443 case Intrinsic::fmuladd: {
1446 if (ST->hasVInstructions() && LT.second.isVector())
1448 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1451 case Intrinsic::fabs: {
1453 if (ST->hasVInstructions() && LT.second.isVector()) {
1459 if (LT.second.getVectorElementType() == MVT::bf16 ||
1460 (LT.second.getVectorElementType() == MVT::f16 &&
1461 !ST->hasVInstructionsF16()))
1462 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1467 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1471 case Intrinsic::sqrt: {
1473 if (ST->hasVInstructions() && LT.second.isVector()) {
1476 MVT ConvType = LT.second;
1477 MVT FsqrtType = LT.second;
1480 if (LT.second.getVectorElementType() == MVT::bf16) {
1481 if (LT.second == MVT::nxv32bf16) {
1482 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1483 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1484 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1485 ConvType = MVT::nxv16f16;
1486 FsqrtType = MVT::nxv16f32;
1488 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1489 FsqrtOp = {RISCV::VFSQRT_V};
1490 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1492 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1493 !ST->hasVInstructionsF16()) {
1494 if (LT.second == MVT::nxv32f16) {
1495 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1496 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1497 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1498 ConvType = MVT::nxv16f16;
1499 FsqrtType = MVT::nxv16f32;
1501 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1502 FsqrtOp = {RISCV::VFSQRT_V};
1503 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1506 FsqrtOp = {RISCV::VFSQRT_V};
1509 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1510 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1514 case Intrinsic::cttz:
1515 case Intrinsic::ctlz:
1516 case Intrinsic::ctpop: {
1518 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1520 switch (ICA.
getID()) {
1521 case Intrinsic::cttz:
1524 case Intrinsic::ctlz:
1527 case Intrinsic::ctpop:
1528 Op = RISCV::VCPOP_V;
1531 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1535 case Intrinsic::abs: {
1537 if (ST->hasVInstructions() && LT.second.isVector()) {
1541 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1546 case Intrinsic::get_active_lane_mask: {
1547 if (ST->hasVInstructions()) {
1556 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1562 case Intrinsic::stepvector: {
1566 if (ST->hasVInstructions())
1567 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1569 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1570 return 1 + (LT.first - 1);
1572 case Intrinsic::experimental_cttz_elts: {
1574 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1575 if (getTLI()->shouldExpandCttzElements(ArgType))
1592 case Intrinsic::experimental_vp_splat: {
1595 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
1597 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1602 case Intrinsic::experimental_vp_splice: {
1610 case Intrinsic::fptoui_sat:
1611 case Intrinsic::fptosi_sat: {
1613 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1618 if (!SrcTy->isVectorTy())
1621 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1640 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1642 LT.second.isVector()) {
1643 MVT EltTy = LT.second.getVectorElementType();
1645 ICA.
getID(), EltTy))
1646 return LT.first * Entry->Cost;
1659 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1677 if (ST->enablePExtCodeGen() &&
1685 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1686 Dst->getScalarSizeInBits() > ST->getELen())
1689 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1704 if (Src->getScalarSizeInBits() == 1) {
1709 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1710 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1716 if (Dst->getScalarSizeInBits() == 1) {
1722 return SrcLT.first *
1723 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1735 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1736 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1738 SrcLT.second.getSizeInBits()) ||
1740 DstLT.second.getSizeInBits()) ||
1741 SrcLT.first > 1 || DstLT.first > 1)
1745 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1747 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1748 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1752 if ((PowDiff < 1) || (PowDiff > 3))
1754 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1755 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1758 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1761 case ISD::FP_EXTEND:
1764 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1765 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1768 : (
ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
1769 : RISCV::VFNCVT_F_F_W;
1771 for (; SrcEltSize != DstEltSize;) {
1775 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1777 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1785 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1787 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1789 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1790 unsigned SrcEltSize = Src->getScalarSizeInBits();
1791 unsigned DstEltSize = Dst->getScalarSizeInBits();
1793 if ((SrcEltSize == 16) &&
1794 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1800 std::pair<InstructionCost, MVT> VecF32LT =
1803 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1808 if (DstEltSize == SrcEltSize)
1809 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1810 else if (DstEltSize > SrcEltSize)
1811 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1816 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1817 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1818 if ((SrcEltSize / 2) > DstEltSize) {
1829 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1830 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1831 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1832 unsigned SrcEltSize = Src->getScalarSizeInBits();
1833 unsigned DstEltSize = Dst->getScalarSizeInBits();
1836 if ((DstEltSize == 16) &&
1837 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1843 std::pair<InstructionCost, MVT> VecF32LT =
1846 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1851 if (DstEltSize == SrcEltSize)
1852 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1853 else if (DstEltSize > SrcEltSize) {
1854 if ((DstEltSize / 2) > SrcEltSize) {
1858 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1861 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1863 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1870unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1872 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
1873 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
1888 if (Ty->getScalarSizeInBits() > ST->getELen())
1892 if (Ty->getElementType()->isIntegerTy(1)) {
1896 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1902 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1906 case Intrinsic::maximum:
1908 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1910 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1925 case Intrinsic::minimum:
1927 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1929 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1935 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
1944 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1953 case Intrinsic::smax:
1954 SplitOp = RISCV::VMAX_VV;
1955 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1957 case Intrinsic::smin:
1958 SplitOp = RISCV::VMIN_VV;
1959 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1961 case Intrinsic::umax:
1962 SplitOp = RISCV::VMAXU_VV;
1963 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1965 case Intrinsic::umin:
1966 SplitOp = RISCV::VMINU_VV;
1967 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1969 case Intrinsic::maxnum:
1970 SplitOp = RISCV::VFMAX_VV;
1971 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1973 case Intrinsic::minnum:
1974 SplitOp = RISCV::VFMIN_VV;
1975 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1980 (LT.first > 1) ? (LT.first - 1) *
1981 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1983 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1988 std::optional<FastMathFlags> FMF,
1994 if (Ty->getScalarSizeInBits() > ST->getELen())
1997 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2005 Type *ElementTy = Ty->getElementType();
2010 if (LT.second == MVT::v1i1)
2011 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
2029 return ((LT.first > 2) ? (LT.first - 2) : 0) *
2030 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
2031 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
2032 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2041 return (LT.first - 1) *
2042 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
2043 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
2051 return (LT.first - 1) *
2052 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
2053 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2066 SplitOp = RISCV::VADD_VV;
2067 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2070 SplitOp = RISCV::VOR_VV;
2071 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2074 SplitOp = RISCV::VXOR_VV;
2075 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2078 SplitOp = RISCV::VAND_VV;
2079 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2083 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2084 LT.second.getScalarType() == MVT::bf16)
2088 for (
unsigned i = 0; i < LT.first.getValue(); i++)
2091 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2093 SplitOp = RISCV::VFADD_VV;
2094 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2099 (LT.first > 1) ? (LT.first - 1) *
2100 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2102 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2106 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2117 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2123 if (IsUnsigned && Opcode == Instruction::Add &&
2124 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2128 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2135 return (LT.first - 1) +
2142 assert(OpInfo.isConstant() &&
"non constant operand?");
2149 if (OpInfo.isUniform())
2155 return getConstantPoolLoadCost(Ty,
CostKind);
2164 EVT VT = TLI->getValueType(
DL, Src,
true);
2166 if (VT == MVT::Other)
2171 if (Opcode == Instruction::Store && OpInfo.isConstant())
2186 if (Src->
isVectorTy() && LT.second.isVector() &&
2188 LT.second.getSizeInBits()))
2198 if (ST->hasVInstructions() && LT.second.isVector() &&
2200 BaseCost *= TLI->getLMULCost(LT.second);
2201 return Cost + BaseCost;
2210 Op1Info, Op2Info,
I);
2214 Op1Info, Op2Info,
I);
2217 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2219 Op1Info, Op2Info,
I);
2221 auto GetConstantMatCost =
2223 if (OpInfo.isUniform())
2228 return getConstantPoolLoadCost(ValTy,
CostKind);
2233 ConstantMatCost += GetConstantMatCost(Op1Info);
2235 ConstantMatCost += GetConstantMatCost(Op2Info);
2238 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2240 if (ValTy->getScalarSizeInBits() == 1) {
2244 return ConstantMatCost +
2246 getRISCVInstructionCost(
2247 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2251 return ConstantMatCost +
2252 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2256 if (ValTy->getScalarSizeInBits() == 1) {
2262 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2263 return ConstantMatCost +
2265 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2267 LT.first * getRISCVInstructionCost(
2268 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2275 return ConstantMatCost +
2276 LT.first * getRISCVInstructionCost(
2277 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2281 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2285 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2290 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2295 return ConstantMatCost +
2296 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2302 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2303 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2304 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2306 Op1Info, Op2Info,
I);
2315 return ConstantMatCost +
2316 LT.first * getRISCVInstructionCost(
2317 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2324 return ConstantMatCost +
2326 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2335 return ConstantMatCost +
2337 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2348 ValTy->isIntegerTy() && !
I->user_empty()) {
2350 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2351 U->getType()->isIntegerTy() &&
2352 !isa<ConstantData>(U->getOperand(1)) &&
2353 !isa<ConstantData>(U->getOperand(2));
2361 Op1Info, Op2Info,
I);
2368 return Opcode == Instruction::PHI ? 0 : 1;
2377 const Value *Op1)
const {
2387 if (Opcode != Instruction::ExtractElement &&
2388 Opcode != Instruction::InsertElement)
2395 if (!LT.second.isVector()) {
2404 Type *ElemTy = FixedVecTy->getElementType();
2405 auto NumElems = FixedVecTy->getNumElements();
2406 auto Align =
DL.getPrefTypeAlign(ElemTy);
2411 return Opcode == Instruction::ExtractElement
2412 ? StoreCost * NumElems + LoadCost
2413 : (StoreCost + LoadCost) * NumElems + StoreCost;
2417 if (LT.second.isScalableVector() && !LT.first.isValid())
2425 if (Opcode == Instruction::ExtractElement) {
2431 return ExtendCost + ExtractCost;
2441 return ExtendCost + InsertCost + TruncCost;
2447 unsigned BaseCost = 1;
2449 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2454 if (LT.second.isFixedLengthVector()) {
2455 unsigned Width = LT.second.getVectorNumElements();
2456 Index = Index % Width;
2461 if (
auto VLEN = ST->getRealVLen()) {
2462 unsigned EltSize = LT.second.getScalarSizeInBits();
2463 unsigned M1Max = *VLEN / EltSize;
2464 Index = Index % M1Max;
2470 else if (ST->hasVendorXRivosVisni() &&
isUInt<5>(Index) &&
2473 else if (Opcode == Instruction::InsertElement)
2481 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2482 LT.second.isScalableVector()))) {
2484 Align VecAlign =
DL.getPrefTypeAlign(Val);
2485 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2490 if (Opcode == Instruction::ExtractElement)
2526 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2528 return BaseCost + SlideCost;
2534 unsigned Index)
const {
2543 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2545 EC.getKnownMinValue() - 1 - Index,
nullptr,
2572 if (!LT.second.isVector())
2578 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2580 if ((LT.second.getVectorElementType() == MVT::f16 ||
2581 LT.second.getVectorElementType() == MVT::bf16) &&
2582 TLI->getOperationAction(ISDOpcode, LT.second) ==
2584 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2588 CastCost += LT.first * Args.size() *
2596 LT.second = PromotedVT;
2599 auto getConstantMatCost =
2609 return getConstantPoolLoadCost(Ty,
CostKind);
2615 ConstantMatCost += getConstantMatCost(0, Op1Info);
2617 ConstantMatCost += getConstantMatCost(1, Op2Info);
2620 switch (ISDOpcode) {
2623 Op = RISCV::VADD_VV;
2628 Op = RISCV::VSLL_VV;
2633 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2638 Op = RISCV::VMUL_VV;
2642 Op = RISCV::VDIV_VV;
2646 Op = RISCV::VREM_VV;
2650 Op = RISCV::VFADD_VV;
2653 Op = RISCV::VFMUL_VV;
2656 Op = RISCV::VFDIV_VV;
2659 Op = RISCV::VFSGNJN_VV;
2664 return CastCost + ConstantMatCost +
2673 if (Ty->isFPOrFPVectorTy())
2675 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2698 if (Info.isSameBase() && V !=
Base) {
2699 if (
GEP->hasAllConstantIndices())
2705 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2706 if (Info.isUnitStride() &&
2712 GEP->getType()->getPointerAddressSpace()))
2715 {TTI::OK_AnyValue, TTI::OP_None},
2716 {TTI::OK_AnyValue, TTI::OP_None}, {});
2733 if (ST->enableDefaultUnroll())
2743 if (L->getHeader()->getParent()->hasOptSize())
2747 L->getExitingBlocks(ExitingBlocks);
2749 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2750 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2754 if (ExitingBlocks.
size() > 2)
2759 if (L->getNumBlocks() > 4)
2767 for (
auto *BB : L->getBlocks()) {
2768 for (
auto &
I : *BB) {
2772 if (IsVectorized &&
I.getType()->isVectorTy())
2812 bool HasMask =
false;
2815 bool IsWrite) -> int64_t {
2816 if (
auto *TarExtTy =
2818 return TarExtTy->getIntParameter(0);
2824 case Intrinsic::riscv_vle_mask:
2825 case Intrinsic::riscv_vse_mask:
2826 case Intrinsic::riscv_vlseg2_mask:
2827 case Intrinsic::riscv_vlseg3_mask:
2828 case Intrinsic::riscv_vlseg4_mask:
2829 case Intrinsic::riscv_vlseg5_mask:
2830 case Intrinsic::riscv_vlseg6_mask:
2831 case Intrinsic::riscv_vlseg7_mask:
2832 case Intrinsic::riscv_vlseg8_mask:
2833 case Intrinsic::riscv_vsseg2_mask:
2834 case Intrinsic::riscv_vsseg3_mask:
2835 case Intrinsic::riscv_vsseg4_mask:
2836 case Intrinsic::riscv_vsseg5_mask:
2837 case Intrinsic::riscv_vsseg6_mask:
2838 case Intrinsic::riscv_vsseg7_mask:
2839 case Intrinsic::riscv_vsseg8_mask:
2842 case Intrinsic::riscv_vle:
2843 case Intrinsic::riscv_vse:
2844 case Intrinsic::riscv_vlseg2:
2845 case Intrinsic::riscv_vlseg3:
2846 case Intrinsic::riscv_vlseg4:
2847 case Intrinsic::riscv_vlseg5:
2848 case Intrinsic::riscv_vlseg6:
2849 case Intrinsic::riscv_vlseg7:
2850 case Intrinsic::riscv_vlseg8:
2851 case Intrinsic::riscv_vsseg2:
2852 case Intrinsic::riscv_vsseg3:
2853 case Intrinsic::riscv_vsseg4:
2854 case Intrinsic::riscv_vsseg5:
2855 case Intrinsic::riscv_vsseg6:
2856 case Intrinsic::riscv_vsseg7:
2857 case Intrinsic::riscv_vsseg8: {
2874 Ty = TarExtTy->getTypeParameter(0U);
2879 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2880 unsigned VLIndex = RVVIInfo->VLOperand;
2881 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
2889 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2892 unsigned ElemSize = Ty->getScalarSizeInBits();
2896 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2897 Alignment, Mask, EVL);
2900 case Intrinsic::riscv_vlse_mask:
2901 case Intrinsic::riscv_vsse_mask:
2902 case Intrinsic::riscv_vlsseg2_mask:
2903 case Intrinsic::riscv_vlsseg3_mask:
2904 case Intrinsic::riscv_vlsseg4_mask:
2905 case Intrinsic::riscv_vlsseg5_mask:
2906 case Intrinsic::riscv_vlsseg6_mask:
2907 case Intrinsic::riscv_vlsseg7_mask:
2908 case Intrinsic::riscv_vlsseg8_mask:
2909 case Intrinsic::riscv_vssseg2_mask:
2910 case Intrinsic::riscv_vssseg3_mask:
2911 case Intrinsic::riscv_vssseg4_mask:
2912 case Intrinsic::riscv_vssseg5_mask:
2913 case Intrinsic::riscv_vssseg6_mask:
2914 case Intrinsic::riscv_vssseg7_mask:
2915 case Intrinsic::riscv_vssseg8_mask:
2918 case Intrinsic::riscv_vlse:
2919 case Intrinsic::riscv_vsse:
2920 case Intrinsic::riscv_vlsseg2:
2921 case Intrinsic::riscv_vlsseg3:
2922 case Intrinsic::riscv_vlsseg4:
2923 case Intrinsic::riscv_vlsseg5:
2924 case Intrinsic::riscv_vlsseg6:
2925 case Intrinsic::riscv_vlsseg7:
2926 case Intrinsic::riscv_vlsseg8:
2927 case Intrinsic::riscv_vssseg2:
2928 case Intrinsic::riscv_vssseg3:
2929 case Intrinsic::riscv_vssseg4:
2930 case Intrinsic::riscv_vssseg5:
2931 case Intrinsic::riscv_vssseg6:
2932 case Intrinsic::riscv_vssseg7:
2933 case Intrinsic::riscv_vssseg8: {
2950 Ty = TarExtTy->getTypeParameter(0U);
2955 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2956 unsigned VLIndex = RVVIInfo->VLOperand;
2957 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
2969 Alignment =
Align(1);
2976 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2979 unsigned ElemSize = Ty->getScalarSizeInBits();
2983 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2984 Alignment, Mask, EVL, Stride);
2987 case Intrinsic::riscv_vloxei_mask:
2988 case Intrinsic::riscv_vluxei_mask:
2989 case Intrinsic::riscv_vsoxei_mask:
2990 case Intrinsic::riscv_vsuxei_mask:
2991 case Intrinsic::riscv_vloxseg2_mask:
2992 case Intrinsic::riscv_vloxseg3_mask:
2993 case Intrinsic::riscv_vloxseg4_mask:
2994 case Intrinsic::riscv_vloxseg5_mask:
2995 case Intrinsic::riscv_vloxseg6_mask:
2996 case Intrinsic::riscv_vloxseg7_mask:
2997 case Intrinsic::riscv_vloxseg8_mask:
2998 case Intrinsic::riscv_vluxseg2_mask:
2999 case Intrinsic::riscv_vluxseg3_mask:
3000 case Intrinsic::riscv_vluxseg4_mask:
3001 case Intrinsic::riscv_vluxseg5_mask:
3002 case Intrinsic::riscv_vluxseg6_mask:
3003 case Intrinsic::riscv_vluxseg7_mask:
3004 case Intrinsic::riscv_vluxseg8_mask:
3005 case Intrinsic::riscv_vsoxseg2_mask:
3006 case Intrinsic::riscv_vsoxseg3_mask:
3007 case Intrinsic::riscv_vsoxseg4_mask:
3008 case Intrinsic::riscv_vsoxseg5_mask:
3009 case Intrinsic::riscv_vsoxseg6_mask:
3010 case Intrinsic::riscv_vsoxseg7_mask:
3011 case Intrinsic::riscv_vsoxseg8_mask:
3012 case Intrinsic::riscv_vsuxseg2_mask:
3013 case Intrinsic::riscv_vsuxseg3_mask:
3014 case Intrinsic::riscv_vsuxseg4_mask:
3015 case Intrinsic::riscv_vsuxseg5_mask:
3016 case Intrinsic::riscv_vsuxseg6_mask:
3017 case Intrinsic::riscv_vsuxseg7_mask:
3018 case Intrinsic::riscv_vsuxseg8_mask:
3021 case Intrinsic::riscv_vloxei:
3022 case Intrinsic::riscv_vluxei:
3023 case Intrinsic::riscv_vsoxei:
3024 case Intrinsic::riscv_vsuxei:
3025 case Intrinsic::riscv_vloxseg2:
3026 case Intrinsic::riscv_vloxseg3:
3027 case Intrinsic::riscv_vloxseg4:
3028 case Intrinsic::riscv_vloxseg5:
3029 case Intrinsic::riscv_vloxseg6:
3030 case Intrinsic::riscv_vloxseg7:
3031 case Intrinsic::riscv_vloxseg8:
3032 case Intrinsic::riscv_vluxseg2:
3033 case Intrinsic::riscv_vluxseg3:
3034 case Intrinsic::riscv_vluxseg4:
3035 case Intrinsic::riscv_vluxseg5:
3036 case Intrinsic::riscv_vluxseg6:
3037 case Intrinsic::riscv_vluxseg7:
3038 case Intrinsic::riscv_vluxseg8:
3039 case Intrinsic::riscv_vsoxseg2:
3040 case Intrinsic::riscv_vsoxseg3:
3041 case Intrinsic::riscv_vsoxseg4:
3042 case Intrinsic::riscv_vsoxseg5:
3043 case Intrinsic::riscv_vsoxseg6:
3044 case Intrinsic::riscv_vsoxseg7:
3045 case Intrinsic::riscv_vsoxseg8:
3046 case Intrinsic::riscv_vsuxseg2:
3047 case Intrinsic::riscv_vsuxseg3:
3048 case Intrinsic::riscv_vsuxseg4:
3049 case Intrinsic::riscv_vsuxseg5:
3050 case Intrinsic::riscv_vsuxseg6:
3051 case Intrinsic::riscv_vsuxseg7:
3052 case Intrinsic::riscv_vsuxseg8: {
3069 Ty = TarExtTy->getTypeParameter(0U);
3074 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3075 unsigned VLIndex = RVVIInfo->VLOperand;
3076 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3089 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3092 unsigned ElemSize = Ty->getScalarSizeInBits();
3097 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3098 Align(1), Mask, EVL,
3107 if (Ty->isVectorTy()) {
3110 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
3116 if (
Size.isScalable() && ST->hasVInstructions())
3119 if (ST->useRVVForFixedLengthVectors())
3139 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
3147 return ST->enableUnalignedVectorMem();
3153 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3175 Align Alignment)
const {
3177 if (!VTy || VTy->isScalableTy())
3185 if (VTy->getElementType()->isIntegerTy(8))
3186 if (VTy->getElementCount().getFixedValue() > 256)
3187 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3188 ST->getMaxLMULForFixedLengthVectors();
3193 Align Alignment)
const {
3195 if (!VTy || VTy->isScalableTy())
3209 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
3210 bool Considerable =
false;
3211 AllowPromotionWithoutCommonHeader =
false;
3214 Type *ConsideredSExtType =
3216 if (
I.getType() != ConsideredSExtType)
3220 for (
const User *U :
I.users()) {
3222 Considerable =
true;
3226 if (GEPInst->getNumOperands() > 2) {
3227 AllowPromotionWithoutCommonHeader =
true;
3232 return Considerable;
3237 case Instruction::Add:
3238 case Instruction::Sub:
3239 case Instruction::Mul:
3240 case Instruction::And:
3241 case Instruction::Or:
3242 case Instruction::Xor:
3243 case Instruction::FAdd:
3244 case Instruction::FSub:
3245 case Instruction::FMul:
3246 case Instruction::FDiv:
3247 case Instruction::ICmp:
3248 case Instruction::FCmp:
3250 case Instruction::Shl:
3251 case Instruction::LShr:
3252 case Instruction::AShr:
3253 case Instruction::UDiv:
3254 case Instruction::SDiv:
3255 case Instruction::URem:
3256 case Instruction::SRem:
3257 case Instruction::Select:
3258 return Operand == 1;
3265 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3275 switch (
II->getIntrinsicID()) {
3276 case Intrinsic::fma:
3277 case Intrinsic::vp_fma:
3278 case Intrinsic::fmuladd:
3279 case Intrinsic::vp_fmuladd:
3280 return Operand == 0 || Operand == 1;
3281 case Intrinsic::vp_shl:
3282 case Intrinsic::vp_lshr:
3283 case Intrinsic::vp_ashr:
3284 case Intrinsic::vp_udiv:
3285 case Intrinsic::vp_sdiv:
3286 case Intrinsic::vp_urem:
3287 case Intrinsic::vp_srem:
3288 case Intrinsic::ssub_sat:
3289 case Intrinsic::vp_ssub_sat:
3290 case Intrinsic::usub_sat:
3291 case Intrinsic::vp_usub_sat:
3292 case Intrinsic::vp_select:
3293 return Operand == 1;
3295 case Intrinsic::vp_add:
3296 case Intrinsic::vp_mul:
3297 case Intrinsic::vp_and:
3298 case Intrinsic::vp_or:
3299 case Intrinsic::vp_xor:
3300 case Intrinsic::vp_fadd:
3301 case Intrinsic::vp_fmul:
3302 case Intrinsic::vp_icmp:
3303 case Intrinsic::vp_fcmp:
3304 case Intrinsic::smin:
3305 case Intrinsic::vp_smin:
3306 case Intrinsic::umin:
3307 case Intrinsic::vp_umin:
3308 case Intrinsic::smax:
3309 case Intrinsic::vp_smax:
3310 case Intrinsic::umax:
3311 case Intrinsic::vp_umax:
3312 case Intrinsic::sadd_sat:
3313 case Intrinsic::vp_sadd_sat:
3314 case Intrinsic::uadd_sat:
3315 case Intrinsic::vp_uadd_sat:
3317 case Intrinsic::vp_sub:
3318 case Intrinsic::vp_fsub:
3319 case Intrinsic::vp_fdiv:
3320 return Operand == 0 || Operand == 1;
3333 if (
I->isBitwiseLogicOp()) {
3334 if (!
I->getType()->isVectorTy()) {
3335 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3336 for (
auto &
Op :
I->operands()) {
3344 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3345 for (
auto &
Op :
I->operands()) {
3357 Ops.push_back(&Not);
3358 Ops.push_back(&InsertElt);
3366 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3374 if (!ST->sinkSplatOperands())
3400 for (
Use &U :
Op->uses()) {
3409 Ops.push_back(&
Op->getOperandUse(0));
3411 Use *InsertEltUse = &
Op->getOperandUse(0);
3414 Ops.push_back(&InsertElt->getOperandUse(1));
3415 Ops.push_back(InsertEltUse);
3427 if (!ST->enableUnalignedScalarMem())
3430 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3433 Options.AllowOverlappingLoads =
true;
3434 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3436 if (ST->is64Bit()) {
3437 Options.LoadSizes = {8, 4, 2, 1};
3438 Options.AllowedTailExpansions = {3, 5, 6};
3440 Options.LoadSizes = {4, 2, 1};
3441 Options.AllowedTailExpansions = {3};
3444 if (IsZeroCmp && ST->hasVInstructions()) {
3445 unsigned VLenB = ST->getRealMinVLen() / 8;
3448 unsigned MinSize = ST->getXLen() / 8 + 1;
3449 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.