18#include "llvm/IR/IntrinsicsRISCV.h"
25#define DEBUG_TYPE "riscvtti"
28 "riscv-v-register-bit-width-lmul",
30 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
31 "by autovectorized code. Fractional LMULs are not supported."),
37 "Overrides result used for getMaximumVF query which is used "
38 "exclusively by SLP vectorizer."),
43 cl::desc(
"Set the lower bound of a trip count to decide on "
44 "vectorization while tail-folding."),
53 size_t NumInstr = OpCodes.size();
58 return LMULCost * NumInstr;
60 for (
auto Op : OpCodes) {
62 case RISCV::VRGATHER_VI:
65 case RISCV::VRGATHER_VV:
68 case RISCV::VSLIDEUP_VI:
69 case RISCV::VSLIDEDOWN_VI:
72 case RISCV::VSLIDEUP_VX:
73 case RISCV::VSLIDEDOWN_VX:
76 case RISCV::VREDMAX_VS:
77 case RISCV::VREDMIN_VS:
78 case RISCV::VREDMAXU_VS:
79 case RISCV::VREDMINU_VS:
80 case RISCV::VREDSUM_VS:
81 case RISCV::VREDAND_VS:
82 case RISCV::VREDOR_VS:
83 case RISCV::VREDXOR_VS:
84 case RISCV::VFREDMAX_VS:
85 case RISCV::VFREDMIN_VS:
86 case RISCV::VFREDUSUM_VS: {
93 case RISCV::VFREDOSUM_VS: {
102 case RISCV::VFMV_F_S:
103 case RISCV::VFMV_S_F:
105 case RISCV::VMXOR_MM:
106 case RISCV::VMAND_MM:
107 case RISCV::VMANDN_MM:
108 case RISCV::VMNAND_MM:
110 case RISCV::VFIRST_M:
125 assert(Ty->isIntegerTy() &&
126 "getIntImmCost can only estimate cost of materialising integers");
149 if (!BO || !BO->hasOneUse())
152 if (BO->getOpcode() != Instruction::Shl)
163 if (ShAmt == Trailing)
180 if (!Cmp || !Cmp->isEquality())
196 if ((CmpC & Mask) != CmpC)
203 return NewCmpC >= -2048 && NewCmpC <= 2048;
210 assert(Ty->isIntegerTy() &&
211 "getIntImmCost can only estimate cost of materialising integers");
219 bool Takes12BitImm =
false;
220 unsigned ImmArgIdx = ~0U;
223 case Instruction::GetElementPtr:
228 case Instruction::Store: {
233 if (Idx == 1 || !Inst)
238 if (!getTLI()->allowsMemoryAccessForAlignment(
246 case Instruction::Load:
249 case Instruction::And:
251 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
254 if (Imm == UINT64_C(0xffffffff) &&
255 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
258 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
260 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
263 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
266 Takes12BitImm =
true;
268 case Instruction::Add:
269 Takes12BitImm =
true;
271 case Instruction::Or:
272 case Instruction::Xor:
274 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
276 Takes12BitImm =
true;
278 case Instruction::Mul:
280 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
283 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
286 Takes12BitImm =
true;
288 case Instruction::Sub:
289 case Instruction::Shl:
290 case Instruction::LShr:
291 case Instruction::AShr:
292 Takes12BitImm =
true;
303 if (Imm.getSignificantBits() <= 64 &&
326 return ST->hasVInstructions();
336 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
343 if (!ST->hasStdExtZvqdotq() || ST->getELen() < 64 ||
344 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
345 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
353 getRISCVInstructionCost(RISCV::VQDOT_VV, LT.second,
CostKind);
360 switch (
II->getIntrinsicID()) {
364 case Intrinsic::vector_reduce_mul:
365 case Intrinsic::vector_reduce_fmul:
371 if (ST->hasVInstructions())
377 if (ST->hasVInstructions())
378 if (
unsigned MinVLen = ST->getRealMinVLen();
393 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
396 (ST->hasVInstructions() &&
406RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
416 unsigned Size = Mask.size();
419 for (
unsigned I = 0;
I !=
Size; ++
I) {
420 if (
static_cast<unsigned>(Mask[
I]) ==
I)
426 for (
unsigned J =
I + 1; J !=
Size; ++J)
428 if (
static_cast<unsigned>(Mask[J]) != J %
I)
456 "Expected fixed vector type and non-empty mask");
459 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
463 if (NumOfDests <= 1 ||
465 Tp->getElementType()->getPrimitiveSizeInBits() ||
466 LegalNumElts >= Tp->getElementCount().getFixedValue())
469 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
472 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
476 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
477 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
478 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
480 assert(NormalizedVF >= Mask.size() &&
481 "Normalized mask expected to be not shorter than original mask.");
486 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
487 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
490 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
493 Cost +=
TTI.getShuffleCost(
496 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
498 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
499 Cost +=
TTI.getShuffleCost(
502 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
525 if (!VLen || Mask.empty())
529 LegalVT =
TTI.getTypeLegalizationCost(
535 if (NumOfDests <= 1 ||
537 Tp->getElementType()->getPrimitiveSizeInBits() ||
541 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
544 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
550 unsigned NormalizedVF =
555 assert(NormalizedVF >= Mask.size() &&
556 "Normalized mask expected to be not shorter than original mask.");
562 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
563 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
566 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
571 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
573 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
575 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
582 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
583 (NumOfDestRegs <= 2 && NumShuffles < 4))
598 if (!
LT.second.isFixedLengthVector())
606 auto GetSlideOpcode = [&](
int SlideAmt) {
608 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
610 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
611 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
614 std::array<std::pair<int, int>, 2> SrcInfo;
618 if (SrcInfo[1].second == 0)
622 if (SrcInfo[0].second != 0) {
623 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
624 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
627 if (SrcInfo[1].first == -1)
628 return FirstSlideCost;
631 if (SrcInfo[1].second != 0) {
632 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
633 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
636 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
643 return FirstSlideCost + SecondSlideCost + MaskCost;
654 "Expected the Mask to match the return size if given");
656 "Expected the same scalar types");
665 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
667 *
this, LT.second, ST->getRealVLen(),
669 if (VRegSplittingCost.
isValid())
670 return VRegSplittingCost;
675 if (Mask.size() >= 2) {
676 MVT EltTp = LT.second.getVectorElementType();
687 return 2 * LT.first * TLI->getLMULCost(LT.second);
689 if (Mask[0] == 0 || Mask[0] == 1) {
693 if (
equal(DeinterleaveMask, Mask))
694 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
699 if (LT.second.getScalarSizeInBits() != 1 &&
702 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
704 for (
unsigned I = 0;
I != NumSlides; ++
I) {
705 unsigned InsertIndex = SubVectorSize * (1 <<
I);
710 std::pair<InstructionCost, MVT> DestLT =
715 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
729 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
730 LT.second.getVectorNumElements() <= 256)) {
735 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
749 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
750 LT.second.getVectorNumElements() <= 256)) {
751 auto &
C = SrcTy->getContext();
752 auto EC = SrcTy->getElementCount();
757 return 2 * IndexCost +
758 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
777 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
805 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
806 if (std::optional<unsigned> VLen = ST->getRealVLen();
807 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
808 SubLT.second.getSizeInBits() <= *VLen)
816 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
823 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
835 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
840 Instruction::InsertElement);
841 if (LT.second.getScalarSizeInBits() == 1) {
849 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
862 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
863 RISCV::VMV_X_S, RISCV::VMV_V_X,
872 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
878 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
884 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
885 if (Index >= 0 && Index < 32)
886 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
887 else if (Index < 0 && Index > -32)
888 Opcodes[1] = RISCV::VSLIDEUP_VI;
889 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
893 if (!LT.second.isVector())
899 if (SrcTy->getElementType()->isIntegerTy(1)) {
911 MVT ContainerVT = LT.second;
912 if (LT.second.isFixedLengthVector())
913 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
915 if (ContainerVT.
bitsLE(M1VT)) {
925 if (LT.second.isFixedLengthVector())
927 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
928 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
929 if (LT.second.isFixedLengthVector() &&
930 isInt<5>(LT.second.getVectorNumElements() - 1))
931 Opcodes[1] = RISCV::VRSUB_VI;
933 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
934 return LT.first * (LenCost + GatherCost);
941 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
943 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
947 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
949 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
950 return FixedCost + LT.first * (GatherCost + SlideCost);
984 Ty, DemandedElts, Insert, Extract,
CostKind);
986 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
987 if (Ty->getScalarSizeInBits() == 1) {
997 assert(LT.second.isFixedLengthVector());
998 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
1002 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
1013 unsigned Opcode = MICA.
getID() == Intrinsic::masked_load ? Instruction::Load
1014 : Instruction::Store;
1029 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1035 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1039 if (LT.second.isVector()) {
1042 VTy->getElementCount().divideCoefficientBy(Factor));
1043 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1044 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1049 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1052 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1053 Cost += Factor * TLI->getLMULCost(SubVecVT);
1054 return LT.first *
Cost;
1061 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1062 unsigned NumLoads = getEstimatedVLFor(VTy);
1063 return NumLoads * MemOpCost;
1076 unsigned VF = FVTy->getNumElements() / Factor;
1083 if (Opcode == Instruction::Load) {
1085 for (
unsigned Index : Indices) {
1089 Mask.resize(VF * Factor, -1);
1093 Cost += ShuffleCost;
1111 UseMaskForCond, UseMaskForGaps);
1113 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1120 return MemCost + ShuffleCost;
1124 unsigned Opcode,
Type *DataTy,
const Value *Ptr,
bool VariableMask,
1130 if ((Opcode == Instruction::Load &&
1132 (Opcode == Instruction::Store &&
1143 {TTI::OK_AnyValue, TTI::OP_None},
I);
1144 unsigned NumLoads = getEstimatedVLFor(&VTy);
1145 return NumLoads * MemOpCost;
1149 unsigned Opcode,
Type *DataTy,
bool VariableMask,
Align Alignment,
1151 bool IsLegal = (Opcode == Instruction::Store &&
1153 (Opcode == Instruction::Load &&
1178 if (Opcode == Instruction::Store)
1179 Opcodes.
append({RISCV::VCOMPRESS_VM});
1181 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1183 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1187 unsigned Opcode,
Type *DataTy,
const Value *Ptr,
bool VariableMask,
1189 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
1191 (Opcode != Instruction::Load && Opcode != Instruction::Store))
1204 {TTI::OK_AnyValue, TTI::OP_None},
I);
1205 unsigned NumLoads = getEstimatedVLFor(&VTy);
1206 return NumLoads * MemOpCost;
1216 for (
auto *Ty : Tys) {
1217 if (!Ty->isVectorTy())
1231 {Intrinsic::floor, MVT::f32, 9},
1232 {Intrinsic::floor, MVT::f64, 9},
1233 {Intrinsic::ceil, MVT::f32, 9},
1234 {Intrinsic::ceil, MVT::f64, 9},
1235 {Intrinsic::trunc, MVT::f32, 7},
1236 {Intrinsic::trunc, MVT::f64, 7},
1237 {Intrinsic::round, MVT::f32, 9},
1238 {Intrinsic::round, MVT::f64, 9},
1239 {Intrinsic::roundeven, MVT::f32, 9},
1240 {Intrinsic::roundeven, MVT::f64, 9},
1241 {Intrinsic::rint, MVT::f32, 7},
1242 {Intrinsic::rint, MVT::f64, 7},
1243 {Intrinsic::nearbyint, MVT::f32, 9},
1244 {Intrinsic::nearbyint, MVT::f64, 9},
1245 {Intrinsic::bswap, MVT::i16, 3},
1246 {Intrinsic::bswap, MVT::i32, 12},
1247 {Intrinsic::bswap, MVT::i64, 31},
1248 {Intrinsic::vp_bswap, MVT::i16, 3},
1249 {Intrinsic::vp_bswap, MVT::i32, 12},
1250 {Intrinsic::vp_bswap, MVT::i64, 31},
1251 {Intrinsic::vp_fshl, MVT::i8, 7},
1252 {Intrinsic::vp_fshl, MVT::i16, 7},
1253 {Intrinsic::vp_fshl, MVT::i32, 7},
1254 {Intrinsic::vp_fshl, MVT::i64, 7},
1255 {Intrinsic::vp_fshr, MVT::i8, 7},
1256 {Intrinsic::vp_fshr, MVT::i16, 7},
1257 {Intrinsic::vp_fshr, MVT::i32, 7},
1258 {Intrinsic::vp_fshr, MVT::i64, 7},
1259 {Intrinsic::bitreverse, MVT::i8, 17},
1260 {Intrinsic::bitreverse, MVT::i16, 24},
1261 {Intrinsic::bitreverse, MVT::i32, 33},
1262 {Intrinsic::bitreverse, MVT::i64, 52},
1263 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1264 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1265 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1266 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1267 {Intrinsic::ctpop, MVT::i8, 12},
1268 {Intrinsic::ctpop, MVT::i16, 19},
1269 {Intrinsic::ctpop, MVT::i32, 20},
1270 {Intrinsic::ctpop, MVT::i64, 21},
1271 {Intrinsic::ctlz, MVT::i8, 19},
1272 {Intrinsic::ctlz, MVT::i16, 28},
1273 {Intrinsic::ctlz, MVT::i32, 31},
1274 {Intrinsic::ctlz, MVT::i64, 35},
1275 {Intrinsic::cttz, MVT::i8, 16},
1276 {Intrinsic::cttz, MVT::i16, 23},
1277 {Intrinsic::cttz, MVT::i32, 24},
1278 {Intrinsic::cttz, MVT::i64, 25},
1279 {Intrinsic::vp_ctpop, MVT::i8, 12},
1280 {Intrinsic::vp_ctpop, MVT::i16, 19},
1281 {Intrinsic::vp_ctpop, MVT::i32, 20},
1282 {Intrinsic::vp_ctpop, MVT::i64, 21},
1283 {Intrinsic::vp_ctlz, MVT::i8, 19},
1284 {Intrinsic::vp_ctlz, MVT::i16, 28},
1285 {Intrinsic::vp_ctlz, MVT::i32, 31},
1286 {Intrinsic::vp_ctlz, MVT::i64, 35},
1287 {Intrinsic::vp_cttz, MVT::i8, 16},
1288 {Intrinsic::vp_cttz, MVT::i16, 23},
1289 {Intrinsic::vp_cttz, MVT::i32, 24},
1290 {Intrinsic::vp_cttz, MVT::i64, 25},
1297 switch (ICA.
getID()) {
1298 case Intrinsic::lrint:
1299 case Intrinsic::llrint:
1300 case Intrinsic::lround:
1301 case Intrinsic::llround: {
1305 if (ST->hasVInstructions() && LT.second.isVector()) {
1307 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1308 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1309 if (LT.second.getVectorElementType() == MVT::bf16) {
1310 if (!ST->hasVInstructionsBF16Minimal())
1313 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1315 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1316 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1317 !ST->hasVInstructionsF16()) {
1318 if (!ST->hasVInstructionsF16Minimal())
1321 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1323 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1325 }
else if (SrcEltSz > DstEltSz) {
1326 Ops = {RISCV::VFNCVT_X_F_W};
1327 }
else if (SrcEltSz < DstEltSz) {
1328 Ops = {RISCV::VFWCVT_X_F_V};
1330 Ops = {RISCV::VFCVT_X_F_V};
1335 if (SrcEltSz > DstEltSz)
1336 return SrcLT.first *
1337 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1338 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1342 case Intrinsic::ceil:
1343 case Intrinsic::floor:
1344 case Intrinsic::trunc:
1345 case Intrinsic::rint:
1346 case Intrinsic::round:
1347 case Intrinsic::roundeven: {
1350 if (!LT.second.isVector() && TLI->isOperationCustom(ISD::FCEIL, LT.second))
1351 return LT.first * 8;
1354 case Intrinsic::umin:
1355 case Intrinsic::umax:
1356 case Intrinsic::smin:
1357 case Intrinsic::smax: {
1359 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1362 if (ST->hasVInstructions() && LT.second.isVector()) {
1364 switch (ICA.
getID()) {
1365 case Intrinsic::umin:
1366 Op = RISCV::VMINU_VV;
1368 case Intrinsic::umax:
1369 Op = RISCV::VMAXU_VV;
1371 case Intrinsic::smin:
1372 Op = RISCV::VMIN_VV;
1374 case Intrinsic::smax:
1375 Op = RISCV::VMAX_VV;
1378 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1382 case Intrinsic::sadd_sat:
1383 case Intrinsic::ssub_sat:
1384 case Intrinsic::uadd_sat:
1385 case Intrinsic::usub_sat: {
1387 if (ST->hasVInstructions() && LT.second.isVector()) {
1389 switch (ICA.
getID()) {
1390 case Intrinsic::sadd_sat:
1391 Op = RISCV::VSADD_VV;
1393 case Intrinsic::ssub_sat:
1394 Op = RISCV::VSSUBU_VV;
1396 case Intrinsic::uadd_sat:
1397 Op = RISCV::VSADDU_VV;
1399 case Intrinsic::usub_sat:
1400 Op = RISCV::VSSUBU_VV;
1403 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1407 case Intrinsic::fma:
1408 case Intrinsic::fmuladd: {
1411 if (ST->hasVInstructions() && LT.second.isVector())
1413 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1416 case Intrinsic::fabs: {
1418 if (ST->hasVInstructions() && LT.second.isVector()) {
1424 if (LT.second.getVectorElementType() == MVT::bf16 ||
1425 (LT.second.getVectorElementType() == MVT::f16 &&
1426 !ST->hasVInstructionsF16()))
1427 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1432 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1436 case Intrinsic::sqrt: {
1438 if (ST->hasVInstructions() && LT.second.isVector()) {
1441 MVT ConvType = LT.second;
1442 MVT FsqrtType = LT.second;
1445 if (LT.second.getVectorElementType() == MVT::bf16) {
1446 if (LT.second == MVT::nxv32bf16) {
1447 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1448 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1449 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1450 ConvType = MVT::nxv16f16;
1451 FsqrtType = MVT::nxv16f32;
1453 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1454 FsqrtOp = {RISCV::VFSQRT_V};
1455 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1457 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1458 !ST->hasVInstructionsF16()) {
1459 if (LT.second == MVT::nxv32f16) {
1460 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1461 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1462 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1463 ConvType = MVT::nxv16f16;
1464 FsqrtType = MVT::nxv16f32;
1466 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1467 FsqrtOp = {RISCV::VFSQRT_V};
1468 FsqrtType = TLI->getTypeToPromoteTo(ISD::FSQRT, FsqrtType);
1471 FsqrtOp = {RISCV::VFSQRT_V};
1474 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1475 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1479 case Intrinsic::cttz:
1480 case Intrinsic::ctlz:
1481 case Intrinsic::ctpop: {
1483 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1485 switch (ICA.
getID()) {
1486 case Intrinsic::cttz:
1489 case Intrinsic::ctlz:
1492 case Intrinsic::ctpop:
1493 Op = RISCV::VCPOP_V;
1496 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1500 case Intrinsic::abs: {
1502 if (ST->hasVInstructions() && LT.second.isVector()) {
1506 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1511 case Intrinsic::get_active_lane_mask: {
1512 if (ST->hasVInstructions()) {
1521 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1527 case Intrinsic::stepvector: {
1531 if (ST->hasVInstructions())
1532 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1534 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1535 return 1 + (LT.first - 1);
1537 case Intrinsic::experimental_cttz_elts: {
1539 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1540 if (getTLI()->shouldExpandCttzElements(ArgType))
1557 case Intrinsic::experimental_vp_splat: {
1560 if (!ST->hasVInstructions() || LT.second.getScalarType() == MVT::i1)
1562 return LT.first * getRISCVInstructionCost(LT.second.isFloatingPoint()
1567 case Intrinsic::experimental_vp_splice: {
1575 case Intrinsic::fptoui_sat:
1576 case Intrinsic::fptosi_sat: {
1578 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1583 if (!SrcTy->isVectorTy())
1586 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1605 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1607 LT.second.isVector()) {
1608 MVT EltTy = LT.second.getVectorElementType();
1610 ICA.
getID(), EltTy))
1611 return LT.first * Entry->Cost;
1624 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1642 if (ST->enablePExtCodeGen() &&
1650 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1651 Dst->getScalarSizeInBits() > ST->getELen())
1654 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1669 if (Src->getScalarSizeInBits() == 1) {
1674 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1675 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1681 if (Dst->getScalarSizeInBits() == 1) {
1687 return SrcLT.first *
1688 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1700 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1701 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1703 SrcLT.second.getSizeInBits()) ||
1705 DstLT.second.getSizeInBits()) ||
1706 SrcLT.first > 1 || DstLT.first > 1)
1710 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1712 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1713 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1717 if ((PowDiff < 1) || (PowDiff > 3))
1719 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1720 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1723 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1726 case ISD::FP_EXTEND:
1729 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1730 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1733 : (
ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V
1734 : RISCV::VFNCVT_F_F_W;
1736 for (; SrcEltSize != DstEltSize;) {
1740 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1742 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1750 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1752 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1754 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1755 unsigned SrcEltSize = Src->getScalarSizeInBits();
1756 unsigned DstEltSize = Dst->getScalarSizeInBits();
1758 if ((SrcEltSize == 16) &&
1759 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1765 std::pair<InstructionCost, MVT> VecF32LT =
1768 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1773 if (DstEltSize == SrcEltSize)
1774 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1775 else if (DstEltSize > SrcEltSize)
1776 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1781 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1782 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1783 if ((SrcEltSize / 2) > DstEltSize) {
1794 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1795 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1796 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1797 unsigned SrcEltSize = Src->getScalarSizeInBits();
1798 unsigned DstEltSize = Dst->getScalarSizeInBits();
1801 if ((DstEltSize == 16) &&
1802 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1808 std::pair<InstructionCost, MVT> VecF32LT =
1811 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1816 if (DstEltSize == SrcEltSize)
1817 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1818 else if (DstEltSize > SrcEltSize) {
1819 if ((DstEltSize / 2) > SrcEltSize) {
1823 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1826 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1828 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1835unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1837 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
1838 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
1853 if (Ty->getScalarSizeInBits() > ST->getELen())
1857 if (Ty->getElementType()->isIntegerTy(1)) {
1861 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1867 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1871 case Intrinsic::maximum:
1873 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1875 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1890 case Intrinsic::minimum:
1892 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1894 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1900 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
1909 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1918 case Intrinsic::smax:
1919 SplitOp = RISCV::VMAX_VV;
1920 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1922 case Intrinsic::smin:
1923 SplitOp = RISCV::VMIN_VV;
1924 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1926 case Intrinsic::umax:
1927 SplitOp = RISCV::VMAXU_VV;
1928 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1930 case Intrinsic::umin:
1931 SplitOp = RISCV::VMINU_VV;
1932 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1934 case Intrinsic::maxnum:
1935 SplitOp = RISCV::VFMAX_VV;
1936 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1938 case Intrinsic::minnum:
1939 SplitOp = RISCV::VFMIN_VV;
1940 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1945 (LT.first > 1) ? (LT.first - 1) *
1946 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1948 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1953 std::optional<FastMathFlags> FMF,
1959 if (Ty->getScalarSizeInBits() > ST->getELen())
1962 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1970 Type *ElementTy = Ty->getElementType();
1975 if (LT.second == MVT::v1i1)
1976 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
1994 return ((LT.first > 2) ? (LT.first - 2) : 0) *
1995 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
1996 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
1997 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2006 return (LT.first - 1) *
2007 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
2008 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
2016 return (LT.first - 1) *
2017 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
2018 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2031 SplitOp = RISCV::VADD_VV;
2032 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2035 SplitOp = RISCV::VOR_VV;
2036 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2039 SplitOp = RISCV::VXOR_VV;
2040 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2043 SplitOp = RISCV::VAND_VV;
2044 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2048 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2049 LT.second.getScalarType() == MVT::bf16)
2053 for (
unsigned i = 0; i < LT.first.getValue(); i++)
2056 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2058 SplitOp = RISCV::VFADD_VV;
2059 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2064 (LT.first > 1) ? (LT.first - 1) *
2065 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2067 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2071 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2082 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2088 if (IsUnsigned && Opcode == Instruction::Add &&
2089 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2093 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2100 return (LT.first - 1) +
2107 assert(OpInfo.isConstant() &&
"non constant operand?");
2114 if (OpInfo.isUniform())
2120 return getConstantPoolLoadCost(Ty,
CostKind);
2129 EVT VT = TLI->getValueType(
DL, Src,
true);
2131 if (VT == MVT::Other)
2136 if (Opcode == Instruction::Store && OpInfo.isConstant())
2151 if (Src->
isVectorTy() && LT.second.isVector() &&
2153 LT.second.getSizeInBits()))
2163 if (ST->hasVInstructions() && LT.second.isVector() &&
2165 BaseCost *= TLI->getLMULCost(LT.second);
2166 return Cost + BaseCost;
2175 Op1Info, Op2Info,
I);
2179 Op1Info, Op2Info,
I);
2182 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2184 Op1Info, Op2Info,
I);
2186 auto GetConstantMatCost =
2188 if (OpInfo.isUniform())
2193 return getConstantPoolLoadCost(ValTy,
CostKind);
2198 ConstantMatCost += GetConstantMatCost(Op1Info);
2200 ConstantMatCost += GetConstantMatCost(Op2Info);
2203 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2205 if (ValTy->getScalarSizeInBits() == 1) {
2209 return ConstantMatCost +
2211 getRISCVInstructionCost(
2212 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2216 return ConstantMatCost +
2217 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2221 if (ValTy->getScalarSizeInBits() == 1) {
2227 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2228 return ConstantMatCost +
2230 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2232 LT.first * getRISCVInstructionCost(
2233 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2240 return ConstantMatCost +
2241 LT.first * getRISCVInstructionCost(
2242 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2246 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2250 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2255 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2260 return ConstantMatCost +
2261 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2267 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2268 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2269 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2271 Op1Info, Op2Info,
I);
2280 return ConstantMatCost +
2281 LT.first * getRISCVInstructionCost(
2282 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2289 return ConstantMatCost +
2291 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2300 return ConstantMatCost +
2302 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2313 ValTy->isIntegerTy() && !
I->user_empty()) {
2315 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2316 U->getType()->isIntegerTy() &&
2317 !isa<ConstantData>(U->getOperand(1)) &&
2318 !isa<ConstantData>(U->getOperand(2));
2326 Op1Info, Op2Info,
I);
2333 return Opcode == Instruction::PHI ? 0 : 1;
2342 const Value *Op1)
const {
2352 if (Opcode != Instruction::ExtractElement &&
2353 Opcode != Instruction::InsertElement)
2360 if (!LT.second.isVector()) {
2369 Type *ElemTy = FixedVecTy->getElementType();
2370 auto NumElems = FixedVecTy->getNumElements();
2371 auto Align =
DL.getPrefTypeAlign(ElemTy);
2376 return Opcode == Instruction::ExtractElement
2377 ? StoreCost * NumElems + LoadCost
2378 : (StoreCost + LoadCost) * NumElems + StoreCost;
2382 if (LT.second.isScalableVector() && !LT.first.isValid())
2390 if (Opcode == Instruction::ExtractElement) {
2396 return ExtendCost + ExtractCost;
2406 return ExtendCost + InsertCost + TruncCost;
2412 unsigned BaseCost = 1;
2414 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2419 if (LT.second.isFixedLengthVector()) {
2420 unsigned Width = LT.second.getVectorNumElements();
2421 Index = Index % Width;
2426 if (
auto VLEN = ST->getRealVLen()) {
2427 unsigned EltSize = LT.second.getScalarSizeInBits();
2428 unsigned M1Max = *VLEN / EltSize;
2429 Index = Index % M1Max;
2435 else if (ST->hasVendorXRivosVisni() &&
isUInt<5>(Index) &&
2438 else if (Opcode == Instruction::InsertElement)
2446 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2447 LT.second.isScalableVector()))) {
2449 Align VecAlign =
DL.getPrefTypeAlign(Val);
2450 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2455 if (Opcode == Instruction::ExtractElement)
2491 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2493 return BaseCost + SlideCost;
2499 unsigned Index)
const {
2508 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2510 EC.getKnownMinValue() - 1 - Index,
nullptr,
2537 if (!LT.second.isVector())
2543 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2545 if ((LT.second.getVectorElementType() == MVT::f16 ||
2546 LT.second.getVectorElementType() == MVT::bf16) &&
2547 TLI->getOperationAction(ISDOpcode, LT.second) ==
2549 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2553 CastCost += LT.first * Args.size() *
2561 LT.second = PromotedVT;
2564 auto getConstantMatCost =
2574 return getConstantPoolLoadCost(Ty,
CostKind);
2580 ConstantMatCost += getConstantMatCost(0, Op1Info);
2582 ConstantMatCost += getConstantMatCost(1, Op2Info);
2585 switch (ISDOpcode) {
2588 Op = RISCV::VADD_VV;
2593 Op = RISCV::VSLL_VV;
2598 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2603 Op = RISCV::VMUL_VV;
2607 Op = RISCV::VDIV_VV;
2611 Op = RISCV::VREM_VV;
2615 Op = RISCV::VFADD_VV;
2618 Op = RISCV::VFMUL_VV;
2621 Op = RISCV::VFDIV_VV;
2624 Op = RISCV::VFSGNJN_VV;
2629 return CastCost + ConstantMatCost +
2638 if (Ty->isFPOrFPVectorTy())
2640 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2663 if (Info.isSameBase() && V !=
Base) {
2664 if (
GEP->hasAllConstantIndices())
2670 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2671 if (Info.isUnitStride() &&
2677 GEP->getType()->getPointerAddressSpace()))
2680 {TTI::OK_AnyValue, TTI::OP_None},
2681 {TTI::OK_AnyValue, TTI::OP_None}, {});
2698 if (ST->enableDefaultUnroll())
2708 if (L->getHeader()->getParent()->hasOptSize())
2712 L->getExitingBlocks(ExitingBlocks);
2714 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2715 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2719 if (ExitingBlocks.
size() > 2)
2724 if (L->getNumBlocks() > 4)
2732 for (
auto *BB : L->getBlocks()) {
2733 for (
auto &
I : *BB) {
2737 if (IsVectorized &&
I.getType()->isVectorTy())
2777 bool HasMask =
false;
2780 bool IsWrite) -> int64_t {
2781 if (
auto *TarExtTy =
2783 return TarExtTy->getIntParameter(0);
2789 case Intrinsic::riscv_vle_mask:
2790 case Intrinsic::riscv_vse_mask:
2791 case Intrinsic::riscv_vlseg2_mask:
2792 case Intrinsic::riscv_vlseg3_mask:
2793 case Intrinsic::riscv_vlseg4_mask:
2794 case Intrinsic::riscv_vlseg5_mask:
2795 case Intrinsic::riscv_vlseg6_mask:
2796 case Intrinsic::riscv_vlseg7_mask:
2797 case Intrinsic::riscv_vlseg8_mask:
2798 case Intrinsic::riscv_vsseg2_mask:
2799 case Intrinsic::riscv_vsseg3_mask:
2800 case Intrinsic::riscv_vsseg4_mask:
2801 case Intrinsic::riscv_vsseg5_mask:
2802 case Intrinsic::riscv_vsseg6_mask:
2803 case Intrinsic::riscv_vsseg7_mask:
2804 case Intrinsic::riscv_vsseg8_mask:
2807 case Intrinsic::riscv_vle:
2808 case Intrinsic::riscv_vse:
2809 case Intrinsic::riscv_vlseg2:
2810 case Intrinsic::riscv_vlseg3:
2811 case Intrinsic::riscv_vlseg4:
2812 case Intrinsic::riscv_vlseg5:
2813 case Intrinsic::riscv_vlseg6:
2814 case Intrinsic::riscv_vlseg7:
2815 case Intrinsic::riscv_vlseg8:
2816 case Intrinsic::riscv_vsseg2:
2817 case Intrinsic::riscv_vsseg3:
2818 case Intrinsic::riscv_vsseg4:
2819 case Intrinsic::riscv_vsseg5:
2820 case Intrinsic::riscv_vsseg6:
2821 case Intrinsic::riscv_vsseg7:
2822 case Intrinsic::riscv_vsseg8: {
2839 Ty = TarExtTy->getTypeParameter(0U);
2844 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2845 unsigned VLIndex = RVVIInfo->VLOperand;
2846 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
2854 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2857 unsigned ElemSize = Ty->getScalarSizeInBits();
2861 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2862 Alignment, Mask, EVL);
2865 case Intrinsic::riscv_vlse_mask:
2866 case Intrinsic::riscv_vsse_mask:
2867 case Intrinsic::riscv_vlsseg2_mask:
2868 case Intrinsic::riscv_vlsseg3_mask:
2869 case Intrinsic::riscv_vlsseg4_mask:
2870 case Intrinsic::riscv_vlsseg5_mask:
2871 case Intrinsic::riscv_vlsseg6_mask:
2872 case Intrinsic::riscv_vlsseg7_mask:
2873 case Intrinsic::riscv_vlsseg8_mask:
2874 case Intrinsic::riscv_vssseg2_mask:
2875 case Intrinsic::riscv_vssseg3_mask:
2876 case Intrinsic::riscv_vssseg4_mask:
2877 case Intrinsic::riscv_vssseg5_mask:
2878 case Intrinsic::riscv_vssseg6_mask:
2879 case Intrinsic::riscv_vssseg7_mask:
2880 case Intrinsic::riscv_vssseg8_mask:
2883 case Intrinsic::riscv_vlse:
2884 case Intrinsic::riscv_vsse:
2885 case Intrinsic::riscv_vlsseg2:
2886 case Intrinsic::riscv_vlsseg3:
2887 case Intrinsic::riscv_vlsseg4:
2888 case Intrinsic::riscv_vlsseg5:
2889 case Intrinsic::riscv_vlsseg6:
2890 case Intrinsic::riscv_vlsseg7:
2891 case Intrinsic::riscv_vlsseg8:
2892 case Intrinsic::riscv_vssseg2:
2893 case Intrinsic::riscv_vssseg3:
2894 case Intrinsic::riscv_vssseg4:
2895 case Intrinsic::riscv_vssseg5:
2896 case Intrinsic::riscv_vssseg6:
2897 case Intrinsic::riscv_vssseg7:
2898 case Intrinsic::riscv_vssseg8: {
2915 Ty = TarExtTy->getTypeParameter(0U);
2920 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
2921 unsigned VLIndex = RVVIInfo->VLOperand;
2922 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
2934 Alignment =
Align(1);
2941 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
2944 unsigned ElemSize = Ty->getScalarSizeInBits();
2948 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
2949 Alignment, Mask, EVL, Stride);
2952 case Intrinsic::riscv_vloxei_mask:
2953 case Intrinsic::riscv_vluxei_mask:
2954 case Intrinsic::riscv_vsoxei_mask:
2955 case Intrinsic::riscv_vsuxei_mask:
2956 case Intrinsic::riscv_vloxseg2_mask:
2957 case Intrinsic::riscv_vloxseg3_mask:
2958 case Intrinsic::riscv_vloxseg4_mask:
2959 case Intrinsic::riscv_vloxseg5_mask:
2960 case Intrinsic::riscv_vloxseg6_mask:
2961 case Intrinsic::riscv_vloxseg7_mask:
2962 case Intrinsic::riscv_vloxseg8_mask:
2963 case Intrinsic::riscv_vluxseg2_mask:
2964 case Intrinsic::riscv_vluxseg3_mask:
2965 case Intrinsic::riscv_vluxseg4_mask:
2966 case Intrinsic::riscv_vluxseg5_mask:
2967 case Intrinsic::riscv_vluxseg6_mask:
2968 case Intrinsic::riscv_vluxseg7_mask:
2969 case Intrinsic::riscv_vluxseg8_mask:
2970 case Intrinsic::riscv_vsoxseg2_mask:
2971 case Intrinsic::riscv_vsoxseg3_mask:
2972 case Intrinsic::riscv_vsoxseg4_mask:
2973 case Intrinsic::riscv_vsoxseg5_mask:
2974 case Intrinsic::riscv_vsoxseg6_mask:
2975 case Intrinsic::riscv_vsoxseg7_mask:
2976 case Intrinsic::riscv_vsoxseg8_mask:
2977 case Intrinsic::riscv_vsuxseg2_mask:
2978 case Intrinsic::riscv_vsuxseg3_mask:
2979 case Intrinsic::riscv_vsuxseg4_mask:
2980 case Intrinsic::riscv_vsuxseg5_mask:
2981 case Intrinsic::riscv_vsuxseg6_mask:
2982 case Intrinsic::riscv_vsuxseg7_mask:
2983 case Intrinsic::riscv_vsuxseg8_mask:
2986 case Intrinsic::riscv_vloxei:
2987 case Intrinsic::riscv_vluxei:
2988 case Intrinsic::riscv_vsoxei:
2989 case Intrinsic::riscv_vsuxei:
2990 case Intrinsic::riscv_vloxseg2:
2991 case Intrinsic::riscv_vloxseg3:
2992 case Intrinsic::riscv_vloxseg4:
2993 case Intrinsic::riscv_vloxseg5:
2994 case Intrinsic::riscv_vloxseg6:
2995 case Intrinsic::riscv_vloxseg7:
2996 case Intrinsic::riscv_vloxseg8:
2997 case Intrinsic::riscv_vluxseg2:
2998 case Intrinsic::riscv_vluxseg3:
2999 case Intrinsic::riscv_vluxseg4:
3000 case Intrinsic::riscv_vluxseg5:
3001 case Intrinsic::riscv_vluxseg6:
3002 case Intrinsic::riscv_vluxseg7:
3003 case Intrinsic::riscv_vluxseg8:
3004 case Intrinsic::riscv_vsoxseg2:
3005 case Intrinsic::riscv_vsoxseg3:
3006 case Intrinsic::riscv_vsoxseg4:
3007 case Intrinsic::riscv_vsoxseg5:
3008 case Intrinsic::riscv_vsoxseg6:
3009 case Intrinsic::riscv_vsoxseg7:
3010 case Intrinsic::riscv_vsoxseg8:
3011 case Intrinsic::riscv_vsuxseg2:
3012 case Intrinsic::riscv_vsuxseg3:
3013 case Intrinsic::riscv_vsuxseg4:
3014 case Intrinsic::riscv_vsuxseg5:
3015 case Intrinsic::riscv_vsuxseg6:
3016 case Intrinsic::riscv_vsuxseg7:
3017 case Intrinsic::riscv_vsuxseg8: {
3034 Ty = TarExtTy->getTypeParameter(0U);
3039 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3040 unsigned VLIndex = RVVIInfo->VLOperand;
3041 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3054 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3057 unsigned ElemSize = Ty->getScalarSizeInBits();
3062 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3063 Align(1), Mask, EVL,
3072 if (Ty->isVectorTy()) {
3075 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
3081 if (
Size.isScalable() && ST->hasVInstructions())
3084 if (ST->useRVVForFixedLengthVectors())
3104 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
3112 return ST->enableUnalignedVectorMem();
3118 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3140 Align Alignment)
const {
3142 if (!VTy || VTy->isScalableTy())
3150 if (VTy->getElementType()->isIntegerTy(8))
3151 if (VTy->getElementCount().getFixedValue() > 256)
3152 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3153 ST->getMaxLMULForFixedLengthVectors();
3158 Align Alignment)
const {
3160 if (!VTy || VTy->isScalableTy())
3174 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
3175 bool Considerable =
false;
3176 AllowPromotionWithoutCommonHeader =
false;
3179 Type *ConsideredSExtType =
3181 if (
I.getType() != ConsideredSExtType)
3185 for (
const User *U :
I.users()) {
3187 Considerable =
true;
3191 if (GEPInst->getNumOperands() > 2) {
3192 AllowPromotionWithoutCommonHeader =
true;
3197 return Considerable;
3202 case Instruction::Add:
3203 case Instruction::Sub:
3204 case Instruction::Mul:
3205 case Instruction::And:
3206 case Instruction::Or:
3207 case Instruction::Xor:
3208 case Instruction::FAdd:
3209 case Instruction::FSub:
3210 case Instruction::FMul:
3211 case Instruction::FDiv:
3212 case Instruction::ICmp:
3213 case Instruction::FCmp:
3215 case Instruction::Shl:
3216 case Instruction::LShr:
3217 case Instruction::AShr:
3218 case Instruction::UDiv:
3219 case Instruction::SDiv:
3220 case Instruction::URem:
3221 case Instruction::SRem:
3222 case Instruction::Select:
3223 return Operand == 1;
3230 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3240 switch (
II->getIntrinsicID()) {
3241 case Intrinsic::fma:
3242 case Intrinsic::vp_fma:
3243 case Intrinsic::fmuladd:
3244 case Intrinsic::vp_fmuladd:
3245 return Operand == 0 || Operand == 1;
3246 case Intrinsic::vp_shl:
3247 case Intrinsic::vp_lshr:
3248 case Intrinsic::vp_ashr:
3249 case Intrinsic::vp_udiv:
3250 case Intrinsic::vp_sdiv:
3251 case Intrinsic::vp_urem:
3252 case Intrinsic::vp_srem:
3253 case Intrinsic::ssub_sat:
3254 case Intrinsic::vp_ssub_sat:
3255 case Intrinsic::usub_sat:
3256 case Intrinsic::vp_usub_sat:
3257 case Intrinsic::vp_select:
3258 return Operand == 1;
3260 case Intrinsic::vp_add:
3261 case Intrinsic::vp_mul:
3262 case Intrinsic::vp_and:
3263 case Intrinsic::vp_or:
3264 case Intrinsic::vp_xor:
3265 case Intrinsic::vp_fadd:
3266 case Intrinsic::vp_fmul:
3267 case Intrinsic::vp_icmp:
3268 case Intrinsic::vp_fcmp:
3269 case Intrinsic::smin:
3270 case Intrinsic::vp_smin:
3271 case Intrinsic::umin:
3272 case Intrinsic::vp_umin:
3273 case Intrinsic::smax:
3274 case Intrinsic::vp_smax:
3275 case Intrinsic::umax:
3276 case Intrinsic::vp_umax:
3277 case Intrinsic::sadd_sat:
3278 case Intrinsic::vp_sadd_sat:
3279 case Intrinsic::uadd_sat:
3280 case Intrinsic::vp_uadd_sat:
3282 case Intrinsic::vp_sub:
3283 case Intrinsic::vp_fsub:
3284 case Intrinsic::vp_fdiv:
3285 return Operand == 0 || Operand == 1;
3298 if (
I->isBitwiseLogicOp()) {
3299 if (!
I->getType()->isVectorTy()) {
3300 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3301 for (
auto &
Op :
I->operands()) {
3309 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3310 for (
auto &
Op :
I->operands()) {
3322 Ops.push_back(&Not);
3323 Ops.push_back(&InsertElt);
3331 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3339 if (!ST->sinkSplatOperands())
3365 for (
Use &U :
Op->uses()) {
3374 Ops.push_back(&
Op->getOperandUse(0));
3376 Use *InsertEltUse = &
Op->getOperandUse(0);
3379 Ops.push_back(&InsertElt->getOperandUse(1));
3380 Ops.push_back(InsertEltUse);
3392 if (!ST->enableUnalignedScalarMem())
3395 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3398 Options.AllowOverlappingLoads =
true;
3399 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3401 if (ST->is64Bit()) {
3402 Options.LoadSizes = {8, 4, 2, 1};
3403 Options.AllowedTailExpansions = {3, 5, 6};
3405 Options.LoadSizes = {4, 2, 1};
3406 Options.AllowedTailExpansions = {3};
3409 if (IsZeroCmp && ST->hasVInstructions()) {
3410 unsigned VLenB = ST->getRealMinVLen() / 8;
3413 unsigned MinSize = ST->getXLen() / 8 + 1;
3414 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
Intrinsic::ID getID() const
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}) const override
Estimate the overhead of scalarizing an instruction.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.