18#include "llvm/IR/IntrinsicsRISCV.h"
26#define DEBUG_TYPE "riscvtti"
29 "riscv-v-register-bit-width-lmul",
31 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
32 "by autovectorized code. Fractional LMULs are not supported."),
38 "Overrides result used for getMaximumVF query which is used "
39 "exclusively by SLP vectorizer."),
44 cl::desc(
"Set the lower bound of a trip count to decide on "
45 "vectorization while tail-folding."),
57 size_t NumInstr = OpCodes.size();
62 return LMULCost * NumInstr;
64 for (
auto Op : OpCodes) {
66 case RISCV::VRGATHER_VI:
69 case RISCV::VRGATHER_VV:
72 case RISCV::VSLIDEUP_VI:
73 case RISCV::VSLIDEDOWN_VI:
76 case RISCV::VSLIDEUP_VX:
77 case RISCV::VSLIDEDOWN_VX:
80 case RISCV::VREDMAX_VS:
81 case RISCV::VREDMIN_VS:
82 case RISCV::VREDMAXU_VS:
83 case RISCV::VREDMINU_VS:
84 case RISCV::VREDSUM_VS:
85 case RISCV::VREDAND_VS:
86 case RISCV::VREDOR_VS:
87 case RISCV::VREDXOR_VS:
88 case RISCV::VFREDMAX_VS:
89 case RISCV::VFREDMIN_VS:
90 case RISCV::VFREDUSUM_VS: {
97 case RISCV::VFREDOSUM_VS: {
106 case RISCV::VFMV_F_S:
107 case RISCV::VFMV_S_F:
109 case RISCV::VMXOR_MM:
110 case RISCV::VMAND_MM:
111 case RISCV::VMANDN_MM:
112 case RISCV::VMNAND_MM:
114 case RISCV::VFIRST_M:
133 assert(Ty->isIntegerTy() &&
134 "getIntImmCost can only estimate cost of materialising integers");
157 if (!BO || !BO->hasOneUse())
160 if (BO->getOpcode() != Instruction::Shl)
171 if (ShAmt == Trailing)
188 if (!Cmp || !Cmp->isEquality())
204 if ((CmpC & Mask) != CmpC)
211 return NewCmpC >= -2048 && NewCmpC <= 2048;
218 assert(Ty->isIntegerTy() &&
219 "getIntImmCost can only estimate cost of materialising integers");
227 bool Takes12BitImm =
false;
228 unsigned ImmArgIdx = ~0U;
231 case Instruction::GetElementPtr:
236 case Instruction::Store: {
241 if (Idx == 1 || !Inst)
246 if (!getTLI()->allowsMemoryAccessForAlignment(
254 case Instruction::Load:
257 case Instruction::And:
259 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
262 if (Imm == UINT64_C(0xffffffff) &&
263 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
266 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
268 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
271 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
274 Takes12BitImm =
true;
276 case Instruction::Add:
277 Takes12BitImm =
true;
279 case Instruction::Or:
280 case Instruction::Xor:
282 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
284 Takes12BitImm =
true;
286 case Instruction::Mul:
288 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
291 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
294 Takes12BitImm =
true;
296 case Instruction::Sub:
297 case Instruction::Shl:
298 case Instruction::LShr:
299 case Instruction::AShr:
300 Takes12BitImm =
true;
311 if (Imm.getSignificantBits() <= 64 &&
334 return ST->hasVInstructions();
344 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
348 if (Opcode == Instruction::FAdd)
353 if (!ST->hasStdExtZvdot4a8i() || ST->getELen() < 64 ||
354 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
355 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
363 getRISCVInstructionCost(RISCV::VDOTA4_VV, LT.second,
CostKind);
370 switch (
II->getIntrinsicID()) {
374 case Intrinsic::vector_reduce_mul:
375 case Intrinsic::vector_reduce_fmul:
381 if (ST->hasVInstructions())
387 if (ST->hasVInstructions())
388 if (
unsigned MinVLen = ST->getRealMinVLen();
403 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
406 (ST->hasVInstructions() &&
429 return (ST->hasAUIPCADDIFusion() && ST->hasLUIADDIFusion()) ? 1 : 2;
435RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
440 return getStaticDataAddrGenerationCost(
CostKind) +
446 unsigned Size = Mask.size();
449 for (
unsigned I = 0;
I !=
Size; ++
I) {
450 if (
static_cast<unsigned>(Mask[
I]) ==
I)
456 for (
unsigned J =
I + 1; J !=
Size; ++J)
458 if (
static_cast<unsigned>(Mask[J]) != J %
I)
486 "Expected fixed vector type and non-empty mask");
489 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
493 if (NumOfDests <= 1 ||
495 Tp->getElementType()->getPrimitiveSizeInBits() ||
496 LegalNumElts >= Tp->getElementCount().getFixedValue())
499 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
502 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
506 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
507 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
508 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
510 assert(NormalizedVF >= Mask.size() &&
511 "Normalized mask expected to be not shorter than original mask.");
516 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
517 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
520 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
523 Cost +=
TTI.getShuffleCost(
526 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
528 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
529 Cost +=
TTI.getShuffleCost(
532 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
555 if (!VLen || Mask.empty())
559 LegalVT =
TTI.getTypeLegalizationCost(
565 if (NumOfDests <= 1 ||
567 Tp->getElementType()->getPrimitiveSizeInBits() ||
571 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
574 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
580 unsigned NormalizedVF =
585 assert(NormalizedVF >= Mask.size() &&
586 "Normalized mask expected to be not shorter than original mask.");
592 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
593 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
596 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
601 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
603 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
605 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
612 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
613 (NumOfDestRegs <= 2 && NumShuffles < 4))
628 if (!
LT.second.isFixedLengthVector())
636 auto GetSlideOpcode = [&](
int SlideAmt) {
638 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
640 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
641 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
644 std::array<std::pair<int, int>, 2> SrcInfo;
648 if (SrcInfo[1].second == 0)
652 if (SrcInfo[0].second != 0) {
653 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
654 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
657 if (SrcInfo[1].first == -1)
658 return FirstSlideCost;
661 if (SrcInfo[1].second != 0) {
662 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
663 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
666 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
673 return FirstSlideCost + SecondSlideCost + MaskCost;
684 "Expected the Mask to match the return size if given");
686 "Expected the same scalar types");
702 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
704 *
this, LT.second, ST->getRealVLen(),
706 if (VRegSplittingCost.
isValid())
707 return VRegSplittingCost;
712 if (Mask.size() >= 2) {
713 MVT EltTp = LT.second.getVectorElementType();
724 return 2 * LT.first * TLI->getLMULCost(LT.second);
726 if (Mask[0] == 0 || Mask[0] == 1) {
730 if (
equal(DeinterleaveMask, Mask))
731 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
736 if (LT.second.getScalarSizeInBits() != 1 &&
739 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
741 for (
unsigned I = 0;
I != NumSlides; ++
I) {
742 unsigned InsertIndex = SubVectorSize * (1 <<
I);
747 std::pair<InstructionCost, MVT> DestLT =
752 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
766 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
767 LT.second.getVectorNumElements() <= 256)) {
772 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
786 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
787 LT.second.getVectorNumElements() <= 256)) {
788 auto &
C = SrcTy->getContext();
789 auto EC = SrcTy->getElementCount();
794 return 2 * IndexCost +
795 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
814 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
842 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
843 if (std::optional<unsigned> VLen = ST->getRealVLen();
844 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
845 SubLT.second.getSizeInBits() <= *VLen)
853 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
860 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
872 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
877 Instruction::InsertElement);
878 if (LT.second.getScalarSizeInBits() == 1) {
886 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
899 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
900 RISCV::VMV_X_S, RISCV::VMV_V_X,
909 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
915 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
921 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
922 if (Index >= 0 && Index < 32)
923 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
924 else if (Index < 0 && Index > -32)
925 Opcodes[1] = RISCV::VSLIDEUP_VI;
926 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
930 if (!LT.second.isVector())
936 if (SrcTy->getElementType()->isIntegerTy(1)) {
948 MVT ContainerVT = LT.second;
949 if (LT.second.isFixedLengthVector())
950 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
952 if (ContainerVT.
bitsLE(M1VT)) {
962 if (LT.second.isFixedLengthVector())
964 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
965 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
966 if (LT.second.isFixedLengthVector() &&
967 isInt<5>(LT.second.getVectorNumElements() - 1))
968 Opcodes[1] = RISCV::VRSUB_VI;
970 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
971 return LT.first * (LenCost + GatherCost);
978 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
980 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
984 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
986 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
987 return FixedCost + LT.first * (GatherCost + SlideCost);
1021 Ty, DemandedElts, Insert, Extract,
CostKind);
1023 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
1024 if (Ty->getScalarSizeInBits() == 1) {
1034 assert(LT.second.isFixedLengthVector());
1035 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
1039 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
1052 switch (MICA.
getID()) {
1053 case Intrinsic::vp_load_ff: {
1054 EVT DataTypeVT = TLI->getValueType(
DL, DataTy);
1055 if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
1062 case Intrinsic::experimental_vp_strided_load:
1063 case Intrinsic::experimental_vp_strided_store:
1065 case Intrinsic::masked_compressstore:
1066 case Intrinsic::masked_expandload:
1068 case Intrinsic::vp_scatter:
1069 case Intrinsic::vp_gather:
1070 case Intrinsic::masked_scatter:
1071 case Intrinsic::masked_gather:
1073 case Intrinsic::vp_load:
1074 case Intrinsic::vp_store:
1075 case Intrinsic::masked_load:
1076 case Intrinsic::masked_store:
1085 unsigned Opcode = MICA.
getID() == Intrinsic::masked_load ? Instruction::Load
1086 : Instruction::Store;
1101 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1107 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1111 if (LT.second.isVector()) {
1114 VTy->getElementCount().divideCoefficientBy(Factor));
1115 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1116 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1121 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1124 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1125 Cost += Factor * TLI->getLMULCost(SubVecVT);
1126 return LT.first *
Cost;
1133 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1134 unsigned NumLoads = getEstimatedVLFor(VTy);
1135 return NumLoads * MemOpCost;
1150 if (UseMaskForGaps) {
1153 "Indices should not contain duplicate elements");
1154 unsigned NumOfFields = Indices.
size();
1155 bool IsTailGapOnly = NumOfFields > 1 && (NumOfFields == Indices.
back() + 1);
1156 if (IsTailGapOnly &&
1157 NumOfFields <= TLI->getMaxSupportedInterleaveFactor()) {
1159 if (LT.second.isVector() &&
1160 FVTy->getElementCount().isKnownMultipleOf(Factor)) {
1162 FVTy->getElementType(),
1163 FVTy->getElementCount().divideCoefficientBy(Factor));
1164 if (TLI->isLegalInterleavedAccessType(SubVecTy, NumOfFields, Alignment,
1167 unsigned NumAccesses = getEstimatedVLFor(FVTy);
1176 unsigned VF = FVTy->getNumElements() / Factor;
1183 if (Opcode == Instruction::Load) {
1185 for (
unsigned Index : Indices) {
1189 Mask.resize(VF * Factor, -1);
1193 Cost += ShuffleCost;
1211 UseMaskForCond, UseMaskForGaps);
1213 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1220 return MemCost + ShuffleCost;
1227 bool IsLoad = MICA.
getID() == Intrinsic::masked_gather ||
1228 MICA.
getID() == Intrinsic::vp_gather;
1229 unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
1235 if ((Opcode == Instruction::Load &&
1237 (Opcode == Instruction::Store &&
1245 unsigned NumLoads = getEstimatedVLFor(&VTy);
1252 unsigned Opcode = MICA.
getID() == Intrinsic::masked_expandload
1254 : Instruction::Store;
1258 bool IsLegal = (Opcode == Instruction::Store &&
1260 (Opcode == Instruction::Load &&
1284 if (Opcode == Instruction::Store)
1285 Opcodes.
append({RISCV::VCOMPRESS_VM});
1287 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1289 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1296 unsigned Opcode = MICA.
getID() == Intrinsic::experimental_vp_strided_load
1298 : Instruction::Store;
1317 {TTI::OK_AnyValue, TTI::OP_None},
I);
1318 unsigned NumLoads = getEstimatedVLFor(&VTy);
1319 return NumLoads * MemOpCost;
1329 for (
auto *Ty : Tys) {
1330 if (!Ty->isVectorTy())
1344 {Intrinsic::floor, MVT::f32, 9},
1345 {Intrinsic::floor, MVT::f64, 9},
1346 {Intrinsic::ceil, MVT::f32, 9},
1347 {Intrinsic::ceil, MVT::f64, 9},
1348 {Intrinsic::trunc, MVT::f32, 7},
1349 {Intrinsic::trunc, MVT::f64, 7},
1350 {Intrinsic::round, MVT::f32, 9},
1351 {Intrinsic::round, MVT::f64, 9},
1352 {Intrinsic::roundeven, MVT::f32, 9},
1353 {Intrinsic::roundeven, MVT::f64, 9},
1354 {Intrinsic::rint, MVT::f32, 7},
1355 {Intrinsic::rint, MVT::f64, 7},
1356 {Intrinsic::nearbyint, MVT::f32, 9},
1357 {Intrinsic::nearbyint, MVT::f64, 9},
1358 {Intrinsic::bswap, MVT::i16, 3},
1359 {Intrinsic::bswap, MVT::i32, 12},
1360 {Intrinsic::bswap, MVT::i64, 31},
1361 {Intrinsic::vp_bswap, MVT::i16, 3},
1362 {Intrinsic::vp_bswap, MVT::i32, 12},
1363 {Intrinsic::vp_bswap, MVT::i64, 31},
1364 {Intrinsic::vp_fshl, MVT::i8, 7},
1365 {Intrinsic::vp_fshl, MVT::i16, 7},
1366 {Intrinsic::vp_fshl, MVT::i32, 7},
1367 {Intrinsic::vp_fshl, MVT::i64, 7},
1368 {Intrinsic::vp_fshr, MVT::i8, 7},
1369 {Intrinsic::vp_fshr, MVT::i16, 7},
1370 {Intrinsic::vp_fshr, MVT::i32, 7},
1371 {Intrinsic::vp_fshr, MVT::i64, 7},
1372 {Intrinsic::bitreverse, MVT::i8, 17},
1373 {Intrinsic::bitreverse, MVT::i16, 24},
1374 {Intrinsic::bitreverse, MVT::i32, 33},
1375 {Intrinsic::bitreverse, MVT::i64, 52},
1376 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1377 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1378 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1379 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1380 {Intrinsic::ctpop, MVT::i8, 12},
1381 {Intrinsic::ctpop, MVT::i16, 19},
1382 {Intrinsic::ctpop, MVT::i32, 20},
1383 {Intrinsic::ctpop, MVT::i64, 21},
1384 {Intrinsic::ctlz, MVT::i8, 19},
1385 {Intrinsic::ctlz, MVT::i16, 28},
1386 {Intrinsic::ctlz, MVT::i32, 31},
1387 {Intrinsic::ctlz, MVT::i64, 35},
1388 {Intrinsic::cttz, MVT::i8, 16},
1389 {Intrinsic::cttz, MVT::i16, 23},
1390 {Intrinsic::cttz, MVT::i32, 24},
1391 {Intrinsic::cttz, MVT::i64, 25},
1392 {Intrinsic::vp_ctpop, MVT::i8, 12},
1393 {Intrinsic::vp_ctpop, MVT::i16, 19},
1394 {Intrinsic::vp_ctpop, MVT::i32, 20},
1395 {Intrinsic::vp_ctpop, MVT::i64, 21},
1396 {Intrinsic::vp_ctlz, MVT::i8, 19},
1397 {Intrinsic::vp_ctlz, MVT::i16, 28},
1398 {Intrinsic::vp_ctlz, MVT::i32, 31},
1399 {Intrinsic::vp_ctlz, MVT::i64, 35},
1400 {Intrinsic::vp_cttz, MVT::i8, 16},
1401 {Intrinsic::vp_cttz, MVT::i16, 23},
1402 {Intrinsic::vp_cttz, MVT::i32, 24},
1403 {Intrinsic::vp_cttz, MVT::i64, 25},
1410 switch (ICA.
getID()) {
1411 case Intrinsic::lrint:
1412 case Intrinsic::llrint:
1413 case Intrinsic::lround:
1414 case Intrinsic::llround: {
1418 if (ST->hasVInstructions() && LT.second.isVector()) {
1420 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1421 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1422 if (LT.second.getVectorElementType() == MVT::bf16) {
1423 if (!ST->hasVInstructionsBF16Minimal())
1426 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1428 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1429 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1430 !ST->hasVInstructionsF16()) {
1431 if (!ST->hasVInstructionsF16Minimal())
1434 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1436 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1438 }
else if (SrcEltSz > DstEltSz) {
1439 Ops = {RISCV::VFNCVT_X_F_W};
1440 }
else if (SrcEltSz < DstEltSz) {
1441 Ops = {RISCV::VFWCVT_X_F_V};
1443 Ops = {RISCV::VFCVT_X_F_V};
1448 if (SrcEltSz > DstEltSz)
1449 return SrcLT.first *
1450 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1451 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1455 case Intrinsic::ceil:
1456 case Intrinsic::floor:
1457 case Intrinsic::trunc:
1458 case Intrinsic::rint:
1459 case Intrinsic::round:
1460 case Intrinsic::roundeven: {
1463 if (!LT.second.isVector() && TLI->isOperationCustom(
ISD::FCEIL, LT.second))
1464 return LT.first * 8;
1467 case Intrinsic::umin:
1468 case Intrinsic::umax:
1469 case Intrinsic::smin:
1470 case Intrinsic::smax: {
1472 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1475 if (ST->hasVInstructions() && LT.second.isVector()) {
1477 switch (ICA.
getID()) {
1478 case Intrinsic::umin:
1479 Op = RISCV::VMINU_VV;
1481 case Intrinsic::umax:
1482 Op = RISCV::VMAXU_VV;
1484 case Intrinsic::smin:
1485 Op = RISCV::VMIN_VV;
1487 case Intrinsic::smax:
1488 Op = RISCV::VMAX_VV;
1491 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1495 case Intrinsic::sadd_sat:
1496 case Intrinsic::ssub_sat:
1497 case Intrinsic::uadd_sat:
1498 case Intrinsic::usub_sat: {
1500 if (ST->hasVInstructions() && LT.second.isVector()) {
1502 switch (ICA.
getID()) {
1503 case Intrinsic::sadd_sat:
1504 Op = RISCV::VSADD_VV;
1506 case Intrinsic::ssub_sat:
1507 Op = RISCV::VSSUB_VV;
1509 case Intrinsic::uadd_sat:
1510 Op = RISCV::VSADDU_VV;
1512 case Intrinsic::usub_sat:
1513 Op = RISCV::VSSUBU_VV;
1516 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1520 case Intrinsic::fma:
1521 case Intrinsic::fmuladd: {
1524 if (ST->hasVInstructions() && LT.second.isVector())
1526 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1529 case Intrinsic::fabs: {
1531 if (ST->hasVInstructions() && LT.second.isVector()) {
1537 if (LT.second.getVectorElementType() == MVT::bf16 ||
1538 (LT.second.getVectorElementType() == MVT::f16 &&
1539 !ST->hasVInstructionsF16()))
1540 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1545 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1549 case Intrinsic::sqrt: {
1551 if (ST->hasVInstructions() && LT.second.isVector()) {
1554 MVT ConvType = LT.second;
1555 MVT FsqrtType = LT.second;
1558 if (LT.second.getVectorElementType() == MVT::bf16) {
1559 if (LT.second == MVT::nxv32bf16) {
1560 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1561 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1562 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1563 ConvType = MVT::nxv16f16;
1564 FsqrtType = MVT::nxv16f32;
1566 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1567 FsqrtOp = {RISCV::VFSQRT_V};
1568 FsqrtType = TLI->getTypeToPromoteTo(
ISD::FSQRT, FsqrtType);
1570 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1571 !ST->hasVInstructionsF16()) {
1572 if (LT.second == MVT::nxv32f16) {
1573 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1574 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1575 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1576 ConvType = MVT::nxv16f16;
1577 FsqrtType = MVT::nxv16f32;
1579 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1580 FsqrtOp = {RISCV::VFSQRT_V};
1581 FsqrtType = TLI->getTypeToPromoteTo(
ISD::FSQRT, FsqrtType);
1584 FsqrtOp = {RISCV::VFSQRT_V};
1587 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1588 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1592 case Intrinsic::cttz:
1593 case Intrinsic::ctlz:
1594 case Intrinsic::ctpop: {
1596 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1598 switch (ICA.
getID()) {
1599 case Intrinsic::cttz:
1602 case Intrinsic::ctlz:
1605 case Intrinsic::ctpop:
1606 Op = RISCV::VCPOP_V;
1609 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1613 case Intrinsic::abs: {
1615 if (ST->hasVInstructions() && LT.second.isVector()) {
1617 if (ST->hasStdExtZvabd())
1619 getRISCVInstructionCost({RISCV::VABS_V}, LT.second,
CostKind);
1624 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1629 case Intrinsic::fshl:
1630 case Intrinsic::fshr: {
1637 if ((ST->hasStdExtZbb() || ST->hasStdExtZbkb()) && RetTy->isIntegerTy() &&
1639 (RetTy->getIntegerBitWidth() == 32 ||
1640 RetTy->getIntegerBitWidth() == 64) &&
1641 RetTy->getIntegerBitWidth() <= ST->getXLen()) {
1646 case Intrinsic::masked_udiv:
1649 case Intrinsic::masked_sdiv:
1652 case Intrinsic::masked_urem:
1655 case Intrinsic::masked_srem:
1658 case Intrinsic::get_active_lane_mask: {
1659 if (ST->hasVInstructions()) {
1668 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1674 case Intrinsic::stepvector: {
1678 if (ST->hasVInstructions())
1679 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1681 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1682 return 1 + (LT.first - 1);
1684 case Intrinsic::vector_splice_left:
1685 case Intrinsic::vector_splice_right: {
1690 if (ST->hasVInstructions() && LT.second.isVector()) {
1692 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX},
1697 case Intrinsic::experimental_cttz_elts: {
1699 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1700 if (getTLI()->shouldExpandCttzElements(ArgType))
1717 case Intrinsic::experimental_vp_splice: {
1725 case Intrinsic::fptoui_sat:
1726 case Intrinsic::fptosi_sat: {
1728 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1733 if (!SrcTy->isVectorTy())
1736 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1753 case Intrinsic::experimental_vector_extract_last_active: {
1775 unsigned EltWidth = getTLI()->getBitWidthForCttzElements(
1776 TLI->getVectorIdxTy(
getDataLayout()), MaskTy->getElementCount(),
1777 true, &VScaleRange);
1778 EltWidth = std::max(EltWidth, MaskTy->getScalarSizeInBits());
1786 if (StepLT.first > 1)
1790 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1792 Cost += MaskLT.first *
1793 getRISCVInstructionCost(RISCV::VCPOP_M, MaskLT.second,
CostKind);
1795 Cost += StepLT.first *
1796 getRISCVInstructionCost(Opcodes, StepLT.second,
CostKind);
1800 Cost += ValLT.first *
1801 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VI, RISCV::VMV_X_S},
1807 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1809 LT.second.isVector()) {
1810 MVT EltTy = LT.second.getVectorElementType();
1812 ICA.
getID(), EltTy))
1813 return LT.first * Entry->Cost;
1826 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1844 if (ST->hasStdExtP() &&
1852 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1853 Dst->getScalarSizeInBits() > ST->getELen())
1856 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1871 if (Src->getScalarSizeInBits() == 1) {
1876 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1877 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1883 if (Dst->getScalarSizeInBits() == 1) {
1889 return SrcLT.first *
1890 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1902 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1903 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1905 SrcLT.second.getSizeInBits()) ||
1907 DstLT.second.getSizeInBits()) ||
1908 SrcLT.first > 1 || DstLT.first > 1)
1912 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1914 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1915 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1919 if ((PowDiff < 1) || (PowDiff > 3))
1921 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1922 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1925 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1931 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1932 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1936 : RISCV::VFNCVT_F_F_W;
1938 for (; SrcEltSize != DstEltSize;) {
1942 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1944 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1952 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1954 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1956 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1957 unsigned SrcEltSize = Src->getScalarSizeInBits();
1958 unsigned DstEltSize = Dst->getScalarSizeInBits();
1960 if ((SrcEltSize == 16) &&
1961 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1967 std::pair<InstructionCost, MVT> VecF32LT =
1970 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1975 if (DstEltSize == SrcEltSize)
1976 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1977 else if (DstEltSize > SrcEltSize)
1978 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1983 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1984 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1985 if ((SrcEltSize / 2) > DstEltSize) {
1996 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1997 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1998 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1999 unsigned SrcEltSize = Src->getScalarSizeInBits();
2000 unsigned DstEltSize = Dst->getScalarSizeInBits();
2003 if ((DstEltSize == 16) &&
2004 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
2010 std::pair<InstructionCost, MVT> VecF32LT =
2013 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
2018 if (DstEltSize == SrcEltSize)
2019 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
2020 else if (DstEltSize > SrcEltSize) {
2021 if ((DstEltSize / 2) > SrcEltSize) {
2025 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
2028 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
2030 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
2037unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
2039 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
2040 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
2055 if (Ty->getScalarSizeInBits() > ST->getELen())
2059 if (Ty->getElementType()->isIntegerTy(1)) {
2063 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
2069 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
2073 case Intrinsic::maximum:
2075 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
2077 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
2092 case Intrinsic::minimum:
2094 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
2096 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
2102 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
2111 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2120 case Intrinsic::smax:
2121 SplitOp = RISCV::VMAX_VV;
2122 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
2124 case Intrinsic::smin:
2125 SplitOp = RISCV::VMIN_VV;
2126 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
2128 case Intrinsic::umax:
2129 SplitOp = RISCV::VMAXU_VV;
2130 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
2132 case Intrinsic::umin:
2133 SplitOp = RISCV::VMINU_VV;
2134 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
2136 case Intrinsic::maxnum:
2137 SplitOp = RISCV::VFMAX_VV;
2138 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
2140 case Intrinsic::minnum:
2141 SplitOp = RISCV::VFMIN_VV;
2142 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
2147 (LT.first > 1) ? (LT.first - 1) *
2148 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2150 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2155 std::optional<FastMathFlags> FMF,
2161 if (Ty->getScalarSizeInBits() > ST->getELen())
2164 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2172 Type *ElementTy = Ty->getElementType();
2177 if (LT.second == MVT::v1i1)
2178 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
2196 return ((LT.first > 2) ? (LT.first - 2) : 0) *
2197 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
2198 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
2199 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2208 return (LT.first - 1) *
2209 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
2210 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
2218 return (LT.first - 1) *
2219 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
2220 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2233 SplitOp = RISCV::VADD_VV;
2234 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2237 SplitOp = RISCV::VOR_VV;
2238 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2241 SplitOp = RISCV::VXOR_VV;
2242 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2245 SplitOp = RISCV::VAND_VV;
2246 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2250 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2251 LT.second.getScalarType() == MVT::bf16)
2255 for (
unsigned i = 0; i < LT.first.getValue(); i++)
2258 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2260 SplitOp = RISCV::VFADD_VV;
2261 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2266 (LT.first > 1) ? (LT.first - 1) *
2267 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2269 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2273 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2284 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2290 if (IsUnsigned && Opcode == Instruction::Add &&
2291 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2295 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2302 return (LT.first - 1) +
2309 assert(OpInfo.isConstant() &&
"non constant operand?");
2316 if (OpInfo.isUniform())
2322 return getConstantPoolLoadCost(Ty,
CostKind);
2331 EVT VT = TLI->getValueType(
DL, Src,
true);
2333 if (VT == MVT::Other)
2338 if (Opcode == Instruction::Store && OpInfo.isConstant())
2353 if (Src->
isVectorTy() && LT.second.isVector() &&
2355 LT.second.getSizeInBits()))
2365 if (ST->hasVInstructions() && LT.second.isVector() &&
2367 BaseCost *= TLI->getLMULCost(LT.second);
2368 return Cost + BaseCost;
2377 Op1Info, Op2Info,
I);
2381 Op1Info, Op2Info,
I);
2386 Op1Info, Op2Info,
I);
2388 auto GetConstantMatCost =
2390 if (OpInfo.isUniform())
2395 return getConstantPoolLoadCost(ValTy,
CostKind);
2400 ConstantMatCost += GetConstantMatCost(Op1Info);
2402 ConstantMatCost += GetConstantMatCost(Op2Info);
2405 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
2406 if (CondTy->isVectorTy()) {
2411 return ConstantMatCost +
2413 getRISCVInstructionCost(
2414 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2418 return ConstantMatCost +
2419 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2429 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2430 return ConstantMatCost +
2432 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2434 LT.first * getRISCVInstructionCost(
2435 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2442 return ConstantMatCost +
2443 LT.first * getRISCVInstructionCost(
2444 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2448 if ((Opcode == Instruction::ICmp) && ValTy->
isVectorTy() &&
2452 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2457 if ((Opcode == Instruction::FCmp) && ValTy->
isVectorTy() &&
2462 return ConstantMatCost +
2463 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2473 Op1Info, Op2Info,
I);
2482 return ConstantMatCost +
2483 LT.first * getRISCVInstructionCost(
2484 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2491 return ConstantMatCost +
2493 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2502 return ConstantMatCost +
2504 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2517 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2518 U->getType()->isIntegerTy() &&
2519 !isa<ConstantData>(U->getOperand(1)) &&
2520 !isa<ConstantData>(U->getOperand(2));
2528 Op1Info, Op2Info,
I);
2535 return Opcode == Instruction::PHI ? 0 : 1;
2552 if (Opcode != Instruction::ExtractElement &&
2553 Opcode != Instruction::InsertElement)
2561 if (!LT.second.isVector()) {
2570 Type *ElemTy = FixedVecTy->getElementType();
2571 auto NumElems = FixedVecTy->getNumElements();
2572 auto Align =
DL.getPrefTypeAlign(ElemTy);
2577 return Opcode == Instruction::ExtractElement
2578 ? StoreCost * NumElems + LoadCost
2579 : (StoreCost + LoadCost) * NumElems + StoreCost;
2583 if (LT.second.isScalableVector() && !LT.first.isValid())
2591 if (Opcode == Instruction::ExtractElement) {
2597 return ExtendCost + ExtractCost;
2607 return ExtendCost + InsertCost + TruncCost;
2613 unsigned BaseCost = 1;
2615 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2620 if (LT.second.isFixedLengthVector()) {
2621 unsigned Width = LT.second.getVectorNumElements();
2622 Index = Index % Width;
2627 if (
auto VLEN = ST->getRealVLen()) {
2628 unsigned EltSize = LT.second.getScalarSizeInBits();
2629 unsigned M1Max = *VLEN / EltSize;
2630 Index = Index % M1Max;
2636 else if (Opcode == Instruction::InsertElement)
2644 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2645 LT.second.isScalableVector()))) {
2647 Align VecAlign =
DL.getPrefTypeAlign(Val);
2648 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2653 if (Opcode == Instruction::ExtractElement)
2689 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2691 return BaseCost + SlideCost;
2697 unsigned Index)
const {
2706 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2708 EC.getKnownMinValue() - 1 - Index,
nullptr,
2717std::optional<InstructionCost>
2723 if ((Opcode == Instruction::UDiv || Opcode == Instruction::URem) &&
2725 if (Opcode == Instruction::UDiv)
2732 return std::nullopt;
2754 if (std::optional<InstructionCost> CombinedCost =
2756 Op2Info, Args, CxtI))
2757 return *CombinedCost;
2761 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2764 if (!LT.second.isVector()) {
2774 if (TLI->isOperationLegalOrPromote(ISDOpcode, LT.second))
2775 if (
const auto *Entry =
CostTableLookup(DivTbl, ISDOpcode, LT.second))
2776 return Entry->Cost * LT.first;
2785 if ((LT.second.getVectorElementType() == MVT::f16 ||
2786 LT.second.getVectorElementType() == MVT::bf16) &&
2787 TLI->getOperationAction(ISDOpcode, LT.second) ==
2789 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2793 CastCost += LT.first * Args.size() *
2801 LT.second = PromotedVT;
2804 auto getConstantMatCost =
2814 return getConstantPoolLoadCost(Ty,
CostKind);
2820 ConstantMatCost += getConstantMatCost(0, Op1Info);
2822 ConstantMatCost += getConstantMatCost(1, Op2Info);
2825 switch (ISDOpcode) {
2828 Op = RISCV::VADD_VV;
2833 Op = RISCV::VSLL_VV;
2838 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2843 Op = RISCV::VMUL_VV;
2847 Op = RISCV::VDIV_VV;
2851 Op = RISCV::VREM_VV;
2855 Op = RISCV::VFADD_VV;
2858 Op = RISCV::VFMUL_VV;
2861 Op = RISCV::VFDIV_VV;
2864 Op = RISCV::VFSGNJN_VV;
2869 return CastCost + ConstantMatCost +
2878 if (Ty->isFPOrFPVectorTy())
2880 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2903 if (Info.isSameBase() && V !=
Base) {
2904 if (
GEP->hasAllConstantIndices())
2910 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2911 if (Info.isUnitStride() &&
2917 GEP->getType()->getPointerAddressSpace()))
2920 {TTI::OK_AnyValue, TTI::OP_None},
2921 {TTI::OK_AnyValue, TTI::OP_None}, {});
2938 if (ST->enableDefaultUnroll())
2948 if (L->getHeader()->getParent()->hasOptSize())
2952 L->getExitingBlocks(ExitingBlocks);
2954 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2955 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2959 if (ExitingBlocks.
size() > 2)
2964 if (L->getNumBlocks() > 4)
2972 for (
auto *BB : L->getBlocks()) {
2973 for (
auto &
I : *BB) {
2977 if (IsVectorized && (
I.getType()->isVectorTy() ||
2979 return V->getType()->isVectorTy();
3020 bool HasMask =
false;
3023 bool IsWrite) -> int64_t {
3024 if (
auto *TarExtTy =
3026 return TarExtTy->getIntParameter(0);
3032 case Intrinsic::riscv_vle_mask:
3033 case Intrinsic::riscv_vse_mask:
3034 case Intrinsic::riscv_vlseg2_mask:
3035 case Intrinsic::riscv_vlseg3_mask:
3036 case Intrinsic::riscv_vlseg4_mask:
3037 case Intrinsic::riscv_vlseg5_mask:
3038 case Intrinsic::riscv_vlseg6_mask:
3039 case Intrinsic::riscv_vlseg7_mask:
3040 case Intrinsic::riscv_vlseg8_mask:
3041 case Intrinsic::riscv_vsseg2_mask:
3042 case Intrinsic::riscv_vsseg3_mask:
3043 case Intrinsic::riscv_vsseg4_mask:
3044 case Intrinsic::riscv_vsseg5_mask:
3045 case Intrinsic::riscv_vsseg6_mask:
3046 case Intrinsic::riscv_vsseg7_mask:
3047 case Intrinsic::riscv_vsseg8_mask:
3050 case Intrinsic::riscv_vle:
3051 case Intrinsic::riscv_vse:
3052 case Intrinsic::riscv_vlseg2:
3053 case Intrinsic::riscv_vlseg3:
3054 case Intrinsic::riscv_vlseg4:
3055 case Intrinsic::riscv_vlseg5:
3056 case Intrinsic::riscv_vlseg6:
3057 case Intrinsic::riscv_vlseg7:
3058 case Intrinsic::riscv_vlseg8:
3059 case Intrinsic::riscv_vsseg2:
3060 case Intrinsic::riscv_vsseg3:
3061 case Intrinsic::riscv_vsseg4:
3062 case Intrinsic::riscv_vsseg5:
3063 case Intrinsic::riscv_vsseg6:
3064 case Intrinsic::riscv_vsseg7:
3065 case Intrinsic::riscv_vsseg8: {
3082 Ty = TarExtTy->getTypeParameter(0U);
3087 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3088 unsigned VLIndex = RVVIInfo->VLOperand;
3089 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
3097 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3100 unsigned ElemSize = Ty->getScalarSizeInBits();
3104 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3105 Alignment, Mask, EVL);
3108 case Intrinsic::riscv_vlse_mask:
3109 case Intrinsic::riscv_vsse_mask:
3110 case Intrinsic::riscv_vlsseg2_mask:
3111 case Intrinsic::riscv_vlsseg3_mask:
3112 case Intrinsic::riscv_vlsseg4_mask:
3113 case Intrinsic::riscv_vlsseg5_mask:
3114 case Intrinsic::riscv_vlsseg6_mask:
3115 case Intrinsic::riscv_vlsseg7_mask:
3116 case Intrinsic::riscv_vlsseg8_mask:
3117 case Intrinsic::riscv_vssseg2_mask:
3118 case Intrinsic::riscv_vssseg3_mask:
3119 case Intrinsic::riscv_vssseg4_mask:
3120 case Intrinsic::riscv_vssseg5_mask:
3121 case Intrinsic::riscv_vssseg6_mask:
3122 case Intrinsic::riscv_vssseg7_mask:
3123 case Intrinsic::riscv_vssseg8_mask:
3126 case Intrinsic::riscv_vlse:
3127 case Intrinsic::riscv_vsse:
3128 case Intrinsic::riscv_vlsseg2:
3129 case Intrinsic::riscv_vlsseg3:
3130 case Intrinsic::riscv_vlsseg4:
3131 case Intrinsic::riscv_vlsseg5:
3132 case Intrinsic::riscv_vlsseg6:
3133 case Intrinsic::riscv_vlsseg7:
3134 case Intrinsic::riscv_vlsseg8:
3135 case Intrinsic::riscv_vssseg2:
3136 case Intrinsic::riscv_vssseg3:
3137 case Intrinsic::riscv_vssseg4:
3138 case Intrinsic::riscv_vssseg5:
3139 case Intrinsic::riscv_vssseg6:
3140 case Intrinsic::riscv_vssseg7:
3141 case Intrinsic::riscv_vssseg8: {
3158 Ty = TarExtTy->getTypeParameter(0U);
3163 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3164 unsigned VLIndex = RVVIInfo->VLOperand;
3165 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3177 Alignment =
Align(1);
3184 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3187 unsigned ElemSize = Ty->getScalarSizeInBits();
3191 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3192 Alignment, Mask, EVL, Stride);
3195 case Intrinsic::riscv_vloxei_mask:
3196 case Intrinsic::riscv_vluxei_mask:
3197 case Intrinsic::riscv_vsoxei_mask:
3198 case Intrinsic::riscv_vsuxei_mask:
3199 case Intrinsic::riscv_vloxseg2_mask:
3200 case Intrinsic::riscv_vloxseg3_mask:
3201 case Intrinsic::riscv_vloxseg4_mask:
3202 case Intrinsic::riscv_vloxseg5_mask:
3203 case Intrinsic::riscv_vloxseg6_mask:
3204 case Intrinsic::riscv_vloxseg7_mask:
3205 case Intrinsic::riscv_vloxseg8_mask:
3206 case Intrinsic::riscv_vluxseg2_mask:
3207 case Intrinsic::riscv_vluxseg3_mask:
3208 case Intrinsic::riscv_vluxseg4_mask:
3209 case Intrinsic::riscv_vluxseg5_mask:
3210 case Intrinsic::riscv_vluxseg6_mask:
3211 case Intrinsic::riscv_vluxseg7_mask:
3212 case Intrinsic::riscv_vluxseg8_mask:
3213 case Intrinsic::riscv_vsoxseg2_mask:
3214 case Intrinsic::riscv_vsoxseg3_mask:
3215 case Intrinsic::riscv_vsoxseg4_mask:
3216 case Intrinsic::riscv_vsoxseg5_mask:
3217 case Intrinsic::riscv_vsoxseg6_mask:
3218 case Intrinsic::riscv_vsoxseg7_mask:
3219 case Intrinsic::riscv_vsoxseg8_mask:
3220 case Intrinsic::riscv_vsuxseg2_mask:
3221 case Intrinsic::riscv_vsuxseg3_mask:
3222 case Intrinsic::riscv_vsuxseg4_mask:
3223 case Intrinsic::riscv_vsuxseg5_mask:
3224 case Intrinsic::riscv_vsuxseg6_mask:
3225 case Intrinsic::riscv_vsuxseg7_mask:
3226 case Intrinsic::riscv_vsuxseg8_mask:
3229 case Intrinsic::riscv_vloxei:
3230 case Intrinsic::riscv_vluxei:
3231 case Intrinsic::riscv_vsoxei:
3232 case Intrinsic::riscv_vsuxei:
3233 case Intrinsic::riscv_vloxseg2:
3234 case Intrinsic::riscv_vloxseg3:
3235 case Intrinsic::riscv_vloxseg4:
3236 case Intrinsic::riscv_vloxseg5:
3237 case Intrinsic::riscv_vloxseg6:
3238 case Intrinsic::riscv_vloxseg7:
3239 case Intrinsic::riscv_vloxseg8:
3240 case Intrinsic::riscv_vluxseg2:
3241 case Intrinsic::riscv_vluxseg3:
3242 case Intrinsic::riscv_vluxseg4:
3243 case Intrinsic::riscv_vluxseg5:
3244 case Intrinsic::riscv_vluxseg6:
3245 case Intrinsic::riscv_vluxseg7:
3246 case Intrinsic::riscv_vluxseg8:
3247 case Intrinsic::riscv_vsoxseg2:
3248 case Intrinsic::riscv_vsoxseg3:
3249 case Intrinsic::riscv_vsoxseg4:
3250 case Intrinsic::riscv_vsoxseg5:
3251 case Intrinsic::riscv_vsoxseg6:
3252 case Intrinsic::riscv_vsoxseg7:
3253 case Intrinsic::riscv_vsoxseg8:
3254 case Intrinsic::riscv_vsuxseg2:
3255 case Intrinsic::riscv_vsuxseg3:
3256 case Intrinsic::riscv_vsuxseg4:
3257 case Intrinsic::riscv_vsuxseg5:
3258 case Intrinsic::riscv_vsuxseg6:
3259 case Intrinsic::riscv_vsuxseg7:
3260 case Intrinsic::riscv_vsuxseg8: {
3277 Ty = TarExtTy->getTypeParameter(0U);
3282 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3283 unsigned VLIndex = RVVIInfo->VLOperand;
3284 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3297 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3300 unsigned ElemSize = Ty->getScalarSizeInBits();
3305 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3306 Align(1), Mask, EVL,
3315 if (Ty->isVectorTy()) {
3318 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
3324 if (
Size.isScalable() && ST->hasVInstructions())
3327 if (ST->useRVVForFixedLengthVectors())
3347 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
3355 return ST->enableUnalignedVectorMem();
3361 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3383 Align Alignment)
const {
3385 if (!VTy || VTy->isScalableTy())
3393 if (VTy->getElementType()->isIntegerTy(8))
3394 if (VTy->getElementCount().getFixedValue() > 256)
3395 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3396 ST->getMaxLMULForFixedLengthVectors();
3401 Align Alignment)
const {
3403 if (!VTy || VTy->isScalableTy())
3417 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
3418 bool Considerable =
false;
3419 AllowPromotionWithoutCommonHeader =
false;
3422 Type *ConsideredSExtType =
3424 if (
I.getType() != ConsideredSExtType)
3428 for (
const User *U :
I.users()) {
3430 Considerable =
true;
3434 if (GEPInst->getNumOperands() > 2) {
3435 AllowPromotionWithoutCommonHeader =
true;
3440 return Considerable;
3445 case Instruction::Add:
3446 case Instruction::Sub:
3447 case Instruction::Mul:
3448 case Instruction::And:
3449 case Instruction::Or:
3450 case Instruction::Xor:
3451 case Instruction::FAdd:
3452 case Instruction::FSub:
3453 case Instruction::FMul:
3454 case Instruction::FDiv:
3455 case Instruction::ICmp:
3456 case Instruction::FCmp:
3458 case Instruction::Shl:
3459 case Instruction::LShr:
3460 case Instruction::AShr:
3461 case Instruction::UDiv:
3462 case Instruction::SDiv:
3463 case Instruction::URem:
3464 case Instruction::SRem:
3465 case Instruction::Select:
3466 return Operand == 1;
3473 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3483 switch (
II->getIntrinsicID()) {
3484 case Intrinsic::fma:
3485 case Intrinsic::vp_fma:
3486 case Intrinsic::fmuladd:
3487 case Intrinsic::vp_fmuladd:
3488 return Operand == 0 || Operand == 1;
3489 case Intrinsic::vp_shl:
3490 case Intrinsic::vp_lshr:
3491 case Intrinsic::vp_ashr:
3492 case Intrinsic::vp_udiv:
3493 case Intrinsic::vp_sdiv:
3494 case Intrinsic::vp_urem:
3495 case Intrinsic::vp_srem:
3496 case Intrinsic::ssub_sat:
3497 case Intrinsic::vp_ssub_sat:
3498 case Intrinsic::usub_sat:
3499 case Intrinsic::vp_usub_sat:
3500 case Intrinsic::vp_select:
3501 return Operand == 1;
3503 case Intrinsic::vp_add:
3504 case Intrinsic::vp_mul:
3505 case Intrinsic::vp_and:
3506 case Intrinsic::vp_or:
3507 case Intrinsic::vp_xor:
3508 case Intrinsic::vp_fadd:
3509 case Intrinsic::vp_fmul:
3510 case Intrinsic::vp_icmp:
3511 case Intrinsic::vp_fcmp:
3512 case Intrinsic::smin:
3513 case Intrinsic::vp_smin:
3514 case Intrinsic::umin:
3515 case Intrinsic::vp_umin:
3516 case Intrinsic::smax:
3517 case Intrinsic::vp_smax:
3518 case Intrinsic::umax:
3519 case Intrinsic::vp_umax:
3520 case Intrinsic::sadd_sat:
3521 case Intrinsic::vp_sadd_sat:
3522 case Intrinsic::uadd_sat:
3523 case Intrinsic::vp_uadd_sat:
3525 case Intrinsic::vp_sub:
3526 case Intrinsic::vp_fsub:
3527 case Intrinsic::vp_fdiv:
3528 return Operand == 0 || Operand == 1;
3541 if (
I->isBitwiseLogicOp()) {
3542 if (!
I->getType()->isVectorTy()) {
3543 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3544 for (
auto &
Op :
I->operands()) {
3552 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3553 for (
auto &
Op :
I->operands()) {
3565 Ops.push_back(&Not);
3566 Ops.push_back(&InsertElt);
3574 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3582 if (!ST->sinkSplatOperands())
3605 for (
Use &U :
Op->uses()) {
3612 Use *InsertEltUse = &
Op->getOperandUse(0);
3615 Ops.push_back(&InsertElt->getOperandUse(1));
3616 Ops.push_back(InsertEltUse);
3627 if (!ST->enableUnalignedScalarMem())
3630 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3633 Options.AllowOverlappingLoads =
true;
3634 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3636 if (ST->is64Bit()) {
3637 Options.LoadSizes = {8, 4, 2, 1};
3638 Options.AllowedTailExpansions = {3, 5, 6};
3640 Options.LoadSizes = {4, 2, 1};
3641 Options.AllowedTailExpansions = {3};
3644 if (IsZeroCmp && ST->hasVInstructions()) {
3645 unsigned VLenB = ST->getRealMinVLen() / 8;
3648 unsigned MinSize = ST->getXLen() / 8 + 1;
3649 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
3663 if (
I->getOpcode() == Instruction::Or &&
3667 if (
I->getOpcode() == Instruction::Add ||
3668 I->getOpcode() == Instruction::Sub)
3686std::optional<Instruction *>
3692 if (
II.user_empty())
3697 const APInt *Scalar;
3702 return U->getType() == TargetVecTy && match(U, m_BitCast(m_Value()));
3706 unsigned TargetEltBW =
DL.getTypeSizeInBits(TargetVecTy->getElementType());
3707 unsigned SourceEltBW =
DL.getTypeSizeInBits(SourceVecTy->getElementType());
3708 if (TargetEltBW % SourceEltBW)
3710 unsigned TargetScale = TargetEltBW / SourceEltBW;
3711 if (VL % TargetScale || TargetScale == 1)
3713 Type *VLTy =
II.getOperand(2)->getType();
3714 ElementCount SourceEC = SourceVecTy->getElementCount();
3715 unsigned NewEltBW = SourceEltBW * TargetScale;
3717 !
DL.fitsInLegalInteger(NewEltBW))
3720 if (!TLI->isLegalElementTypeForRVV(TLI->getValueType(
DL, NewEltTy)))
3724 assert(SourceVecTy->canLosslesslyBitCastTo(RetTy) &&
3725 "Lossless bitcast between types expected");
3731 RetTy, Intrinsic::riscv_vmv_v_x,
3732 {PoisonValue::get(RetTy), ConstantInt::get(NewEltTy, NewScalar),
3733 ConstantInt::get(VLTy, VL / TargetScale)}),
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
size_t size() const
size - Get the array size.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
LLVM_ABI StringRef getKindAsString() const
Return the attribute's kind as a string.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
This class represents a range of values.
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
The core instruction combiner logic.
const DataLayout & getDataLayout() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool shouldCopyAttributeWhenOutliningFrom(const Function *Caller, const Attribute &Attr) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
std::optional< InstructionCost > getCombinedArithmeticInstructionCost(unsigned ISDOpcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI) const
Check to see if this instruction is expected to be combined to a simpler operation during/before lowe...
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
InstructionCost getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Estimate the overhead of scalarizing an instruction.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto adjacent_find(R &&Range)
Provide wrappers to std::adjacent_find which finds the first pair of adjacent elements that are equal...
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.