23#define DEBUG_TYPE "riscvtti"
26 "riscv-v-register-bit-width-lmul",
28 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
29 "by autovectorized code. Fractional LMULs are not supported."),
35 "Overrides result used for getMaximumVF query which is used "
36 "exclusively by SLP vectorizer."),
45 size_t NumInstr = OpCodes.
size();
50 return LMULCost * NumInstr;
52 for (
auto Op : OpCodes) {
54 case RISCV::VRGATHER_VI:
57 case RISCV::VRGATHER_VV:
60 case RISCV::VSLIDEUP_VI:
61 case RISCV::VSLIDEDOWN_VI:
64 case RISCV::VSLIDEUP_VX:
65 case RISCV::VSLIDEDOWN_VX:
68 case RISCV::VREDMAX_VS:
69 case RISCV::VREDMIN_VS:
70 case RISCV::VREDMAXU_VS:
71 case RISCV::VREDMINU_VS:
72 case RISCV::VREDSUM_VS:
73 case RISCV::VREDAND_VS:
74 case RISCV::VREDOR_VS:
75 case RISCV::VREDXOR_VS:
76 case RISCV::VFREDMAX_VS:
77 case RISCV::VFREDMIN_VS:
78 case RISCV::VFREDUSUM_VS: {
85 case RISCV::VFREDOSUM_VS: {
99 case RISCV::VMANDN_MM:
100 case RISCV::VMNAND_MM:
102 case RISCV::VFIRST_M:
115 "getIntImmCost can only estimate cost of materialising integers");
131 auto *BO = dyn_cast<BinaryOperator>(Inst->
getOperand(0));
132 if (!BO || !BO->hasOneUse())
135 if (BO->getOpcode() != Instruction::Shl)
138 if (!isa<ConstantInt>(BO->getOperand(1)))
141 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
146 if (ShAmt == Trailing)
158 "getIntImmCost can only estimate cost of materialising integers");
166 bool Takes12BitImm =
false;
167 unsigned ImmArgIdx = ~0U;
170 case Instruction::GetElementPtr:
175 case Instruction::Store:
180 case Instruction::Load:
183 case Instruction::And:
185 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
188 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
191 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
193 if (Inst &&
Idx == 1 && Imm.getBitWidth() <= ST->
getXLen() &&
196 Takes12BitImm =
true;
198 case Instruction::Add:
199 Takes12BitImm =
true;
201 case Instruction::Or:
202 case Instruction::Xor:
204 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
206 Takes12BitImm =
true;
208 case Instruction::Mul:
210 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
213 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
216 Takes12BitImm =
true;
218 case Instruction::Sub:
219 case Instruction::Shl:
220 case Instruction::LShr:
221 case Instruction::AShr:
222 Takes12BitImm =
true;
233 if (Imm.getSignificantBits() <= 64 &&
262 return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip()
271 switch (
II->getIntrinsicID()) {
275 case Intrinsic::vector_reduce_mul:
276 case Intrinsic::vector_reduce_fmul:
332 return cast<VectorType>(
EVT(IndexVT).getTypeForEVT(
C));
348 if (isa<FixedVectorType>(Tp)) {
353 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
354 MVT EltTp = LT.second.getVectorElementType();
367 if (Mask[0] == 0 || Mask[0] == 1) {
371 if (
equal(DeinterleaveMask, Mask))
372 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
379 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
380 (LT.second.getScalarSizeInBits() != 8 ||
381 LT.second.getVectorNumElements() <= 256)) {
385 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
394 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
395 (LT.second.getScalarSizeInBits() != 8 ||
396 LT.second.getVectorNumElements() <= 256)) {
403 return 2 * IndexCost +
404 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
414 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
415 LT.second.isFixedLengthVector() &&
416 LT.second.getVectorElementType().getSizeInBits() ==
418 LT.second.getVectorNumElements() <
419 cast<FixedVectorType>(Tp)->getNumElements() &&
421 cast<FixedVectorType>(Tp)->getNumElements()) ==
422 static_cast<unsigned>(*LT.first.getValue())) {
423 unsigned NumRegs = *LT.first.getValue();
424 unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
429 for (
unsigned I = 0;
I < NumRegs; ++
I) {
430 bool IsSingleVector =
true;
433 I == NumRegs - 1 ? Mask.size() % SubVF : SubVF),
434 SubMask.
begin(), [&](
int I) {
435 bool SingleSubVector = I / VF == 0;
436 IsSingleVector &= SingleSubVector;
437 return (SingleSubVector ? 0 : 1) * SubVF + I % VF;
441 SubVecTy, SubMask,
CostKind, 0,
nullptr);
469 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
472 if (MinVLen == MaxVLen &&
473 SubLT.second.getScalarSizeInBits() *
Index % MinVLen == 0 &&
474 SubLT.second.getSizeInBits() <= MinVLen)
482 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
488 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
500 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
505 Instruction::InsertElement);
506 if (LT.second.getScalarSizeInBits() == 1) {
514 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
527 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
528 RISCV::VMV_X_S, RISCV::VMV_V_X,
537 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
543 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
549 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
551 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
552 else if (Index < 0 && Index > -32)
553 Opcodes[1] = RISCV::VSLIDEUP_VI;
554 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
572 if (LT.second.isFixedLengthVector())
574 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
575 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
576 if (LT.second.isFixedLengthVector() &&
577 isInt<5>(LT.second.getVectorNumElements() - 1))
578 Opcodes[1] = RISCV::VRSUB_VI;
580 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
583 return LT.first * (LenCost + GatherCost + ExtendCost);
604 bool UseMaskForCond,
bool UseMaskForGaps) {
605 if (isa<ScalableVectorType>(VecTy) && Factor != 2)
612 if (!UseMaskForCond && !UseMaskForGaps &&
613 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
614 auto *VTy = cast<VectorType>(VecTy);
617 if (LT.second.isVector()) {
620 VTy->getElementCount().divideCoefficientBy(Factor));
622 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
630 LT.second.getVectorElementCount());
633 return LT.first + LegalMemCost;
640 if (isa<ScalableVectorType>(VecTy))
643 auto *FVTy = cast<FixedVectorType>(VecTy);
646 unsigned VF = FVTy->getNumElements() / Factor;
653 if (Opcode == Instruction::Load) {
655 for (
unsigned Index : Indices) {
680 UseMaskForCond, UseMaskForGaps);
682 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
689 return MemCost + ShuffleCost;
693 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
699 if ((Opcode == Instruction::Load &&
701 (Opcode == Instruction::Store &&
709 auto &VTy = *cast<VectorType>(DataTy);
712 {TTI::OK_AnyValue, TTI::OP_None},
I);
713 unsigned NumLoads = getEstimatedVLFor(&VTy);
714 return NumLoads * MemOpCost;
718 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
720 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
722 (Opcode != Instruction::Load && Opcode != Instruction::Store))
732 auto &VTy = *cast<VectorType>(DataTy);
735 {TTI::OK_AnyValue, TTI::OP_None},
I);
736 unsigned NumLoads = getEstimatedVLFor(&VTy);
737 return NumLoads * MemOpCost;
745 {Intrinsic::floor, MVT::f32, 9},
746 {Intrinsic::floor, MVT::f64, 9},
747 {Intrinsic::ceil, MVT::f32, 9},
748 {Intrinsic::ceil, MVT::f64, 9},
749 {Intrinsic::trunc, MVT::f32, 7},
750 {Intrinsic::trunc, MVT::f64, 7},
751 {Intrinsic::round, MVT::f32, 9},
752 {Intrinsic::round, MVT::f64, 9},
753 {Intrinsic::roundeven, MVT::f32, 9},
754 {Intrinsic::roundeven, MVT::f64, 9},
755 {Intrinsic::rint, MVT::f32, 7},
756 {Intrinsic::rint, MVT::f64, 7},
757 {Intrinsic::lrint, MVT::i32, 1},
758 {Intrinsic::lrint, MVT::i64, 1},
759 {Intrinsic::llrint, MVT::i64, 1},
760 {Intrinsic::nearbyint, MVT::f32, 9},
761 {Intrinsic::nearbyint, MVT::f64, 9},
762 {Intrinsic::bswap, MVT::i16, 3},
763 {Intrinsic::bswap, MVT::i32, 12},
764 {Intrinsic::bswap, MVT::i64, 31},
765 {Intrinsic::vp_bswap, MVT::i16, 3},
766 {Intrinsic::vp_bswap, MVT::i32, 12},
767 {Intrinsic::vp_bswap, MVT::i64, 31},
768 {Intrinsic::vp_fshl, MVT::i8, 7},
769 {Intrinsic::vp_fshl, MVT::i16, 7},
770 {Intrinsic::vp_fshl, MVT::i32, 7},
771 {Intrinsic::vp_fshl, MVT::i64, 7},
772 {Intrinsic::vp_fshr, MVT::i8, 7},
773 {Intrinsic::vp_fshr, MVT::i16, 7},
774 {Intrinsic::vp_fshr, MVT::i32, 7},
775 {Intrinsic::vp_fshr, MVT::i64, 7},
776 {Intrinsic::bitreverse, MVT::i8, 17},
777 {Intrinsic::bitreverse, MVT::i16, 24},
778 {Intrinsic::bitreverse, MVT::i32, 33},
779 {Intrinsic::bitreverse, MVT::i64, 52},
780 {Intrinsic::vp_bitreverse, MVT::i8, 17},
781 {Intrinsic::vp_bitreverse, MVT::i16, 24},
782 {Intrinsic::vp_bitreverse, MVT::i32, 33},
783 {Intrinsic::vp_bitreverse, MVT::i64, 52},
784 {Intrinsic::ctpop, MVT::i8, 12},
785 {Intrinsic::ctpop, MVT::i16, 19},
786 {Intrinsic::ctpop, MVT::i32, 20},
787 {Intrinsic::ctpop, MVT::i64, 21},
788 {Intrinsic::vp_ctpop, MVT::i8, 12},
789 {Intrinsic::vp_ctpop, MVT::i16, 19},
790 {Intrinsic::vp_ctpop, MVT::i32, 20},
791 {Intrinsic::vp_ctpop, MVT::i64, 21},
792 {Intrinsic::vp_ctlz, MVT::i8, 19},
793 {Intrinsic::vp_ctlz, MVT::i16, 28},
794 {Intrinsic::vp_ctlz, MVT::i32, 31},
795 {Intrinsic::vp_ctlz, MVT::i64, 35},
796 {Intrinsic::vp_cttz, MVT::i8, 16},
797 {Intrinsic::vp_cttz, MVT::i16, 23},
798 {Intrinsic::vp_cttz, MVT::i32, 24},
799 {Intrinsic::vp_cttz, MVT::i64, 25},
804#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
805 case Intrinsic::VPID: \
807#include "llvm/IR/VPIntrinsics.def"
808#undef HELPER_MAP_VPID_TO_VPSD
817 switch (ICA.
getID()) {
818 case Intrinsic::ceil:
819 case Intrinsic::floor:
820 case Intrinsic::trunc:
821 case Intrinsic::rint:
822 case Intrinsic::lrint:
823 case Intrinsic::llrint:
824 case Intrinsic::round:
825 case Intrinsic::roundeven: {
832 case Intrinsic::umin:
833 case Intrinsic::umax:
834 case Intrinsic::smin:
835 case Intrinsic::smax: {
837 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
842 switch (ICA.
getID()) {
843 case Intrinsic::umin:
844 Op = RISCV::VMINU_VV;
846 case Intrinsic::umax:
847 Op = RISCV::VMAXU_VV;
849 case Intrinsic::smin:
852 case Intrinsic::smax:
856 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
860 case Intrinsic::sadd_sat:
861 case Intrinsic::ssub_sat:
862 case Intrinsic::uadd_sat:
863 case Intrinsic::usub_sat:
864 case Intrinsic::fabs:
865 case Intrinsic::sqrt: {
871 case Intrinsic::ctpop: {
877 case Intrinsic::abs: {
886 case Intrinsic::get_active_lane_mask: {
896 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
902 case Intrinsic::experimental_stepvector: {
907 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
909 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
910 return 1 + (LT.first - 1);
912 case Intrinsic::experimental_cttz_elts: {
924 cast<ConstantInt>(ICA.
getArgs()[1])->isZero())
932 case Intrinsic::vp_rint: {
937 return Cost * LT.first;
940 case Intrinsic::vp_nearbyint: {
945 return Cost * LT.first;
948 case Intrinsic::vp_ceil:
949 case Intrinsic::vp_floor:
950 case Intrinsic::vp_round:
951 case Intrinsic::vp_roundeven:
952 case Intrinsic::vp_roundtozero: {
959 return Cost * LT.first;
966 LT.second.isVector()) {
967 MVT EltTy = LT.second.getVectorElementType();
970 return LT.first * Entry->Cost;
982 bool IsVectorType = isa<VectorType>(Dst) && isa<VectorType>(Src);
987 (Src->getScalarSizeInBits() <= ST->
getELen()) &&
988 (Dst->getScalarSizeInBits() <= ST->
getELen());
998 assert(ISD &&
"Invalid opcode");
1000 int PowDiff = (int)
Log2_32(Dst->getScalarSizeInBits()) -
1001 (
int)
Log2_32(Src->getScalarSizeInBits());
1005 const unsigned SrcEltSize = Src->getScalarSizeInBits();
1006 if (SrcEltSize == 1) {
1011 return getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM},
1014 if ((PowDiff < 1) || (PowDiff > 3))
1016 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1017 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1020 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1023 if (Dst->getScalarSizeInBits() == 1) {
1029 return getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1036 unsigned SrcEltSize = Src->getScalarSizeInBits();
1037 unsigned DstEltSize = Dst->getScalarSizeInBits();
1041 : RISCV::VFNCVT_F_F_W;
1043 for (; SrcEltSize != DstEltSize;) {
1049 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1058 if (Src->getScalarSizeInBits() == 1 || Dst->getScalarSizeInBits() == 1) {
1072 if (std::abs(PowDiff) <= 1)
1076 if (Src->isIntOrIntVectorTy())
1079 return std::abs(PowDiff);
1084unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty) {
1085 if (isa<ScalableVectorType>(Ty)) {
1091 return cast<FixedVectorType>(Ty)->getNumElements();
1110 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1116 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1120 case Intrinsic::maximum:
1122 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1124 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1139 case Intrinsic::minimum:
1141 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1143 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1158 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1167 case Intrinsic::smax:
1168 SplitOp = RISCV::VMAX_VV;
1169 Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1171 case Intrinsic::smin:
1172 SplitOp = RISCV::VMIN_VV;
1173 Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1175 case Intrinsic::umax:
1176 SplitOp = RISCV::VMAXU_VV;
1177 Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1179 case Intrinsic::umin:
1180 SplitOp = RISCV::VMINU_VV;
1181 Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1183 case Intrinsic::maxnum:
1184 SplitOp = RISCV::VFMAX_VV;
1185 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1187 case Intrinsic::minnum:
1188 SplitOp = RISCV::VFMIN_VV;
1189 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1194 (LT.first > 1) ? (LT.first - 1) *
1195 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1197 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1202 std::optional<FastMathFlags> FMF,
1212 assert(ISD &&
"Invalid opcode");
1228 Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M};
1229 return (LT.first - 1) +
1230 getRISCVInstructionCost(Opcodes, LT.second,
CostKind) +
1238 Opcodes = {RISCV::VCPOP_M};
1239 return (LT.first - 1) +
1240 getRISCVInstructionCost(Opcodes, LT.second,
CostKind) +
1249 for (
unsigned i = 0; i < LT.first.getValue(); i++)
1252 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1257 SplitOp = RISCV::VADD_VV;
1258 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1261 SplitOp = RISCV::VOR_VV;
1262 Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
1265 SplitOp = RISCV::VXOR_VV;
1266 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1269 SplitOp = RISCV::VAND_VV;
1270 Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
1273 SplitOp = RISCV::VFADD_VV;
1274 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1279 (LT.first > 1) ? (LT.first - 1) *
1280 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1282 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1286 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1297 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
1307 return (LT.first - 1) +
1315 if (!isa<VectorType>(Ty))
1327 return getConstantPoolLoadCost(Ty,
CostKind);
1339 if (VT == MVT::Other)
1344 if (Opcode == Instruction::Store && OpInfo.
isConstant())
1355 return Cost + BaseCost;
1378 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
1385 getRISCVInstructionCost(
1386 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1391 getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
CostKind);
1400 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
1402 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
1404 LT.first * getRISCVInstructionCost(
1405 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1412 return LT.first * getRISCVInstructionCost(
1413 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
1417 if ((Opcode == Instruction::ICmp) && ValTy->
isVectorTy() &&
1422 getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second,
CostKind);
1425 if ((Opcode == Instruction::FCmp) && ValTy->
isVectorTy() &&
1430 return getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
1449 return LT.first * getRISCVInstructionCost(
1450 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
1458 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
1468 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
1481 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
1482 U->getType()->isIntegerTy() &&
1483 !isa<ConstantData>(U->getOperand(1)) &&
1484 !isa<ConstantData>(U->getOperand(2));
1498 return Opcode == Instruction::PHI ? 0 : 1;
1509 if (Opcode != Instruction::ExtractElement &&
1510 Opcode != Instruction::InsertElement)
1517 if (!LT.second.isVector()) {
1518 auto *FixedVecTy = cast<FixedVectorType>(Val);
1526 Type *ElemTy = FixedVecTy->getElementType();
1527 auto NumElems = FixedVecTy->getNumElements();
1533 return Opcode == Instruction::ExtractElement
1534 ? StoreCost * NumElems + LoadCost
1535 : (StoreCost + LoadCost) * NumElems + StoreCost;
1539 if (LT.second.isScalableVector() && !LT.first.isValid())
1549 cast<VectorType>(Val)->getElementCount());
1550 if (Opcode == Instruction::ExtractElement) {
1556 return ExtendCost + ExtractCost;
1566 return ExtendCost + InsertCost + TruncCost;
1572 unsigned BaseCost = 1;
1574 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
1579 if (LT.second.isFixedLengthVector()) {
1580 unsigned Width = LT.second.getVectorNumElements();
1587 else if (Opcode == Instruction::InsertElement)
1612 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
1614 return BaseCost + SlideCost;
1640 if (!LT.second.isVector())
1645 auto getConstantMatCost =
1655 return getConstantPoolLoadCost(Ty,
CostKind);
1661 ConstantMatCost += getConstantMatCost(0, Op1Info);
1663 ConstantMatCost += getConstantMatCost(1, Op2Info);
1669 Op = RISCV::VADD_VV;
1674 Op = RISCV::VSLL_VV;
1684 Op = RISCV::VMUL_VV;
1688 Op = RISCV::VDIV_VV;
1692 Op = RISCV::VREM_VV;
1697 Op = RISCV::VFADD_VV;
1701 Op = RISCV::VFMUL_VV;
1704 Op = RISCV::VFDIV_VV;
1707 Op = RISCV::VFSGNJN_VV;
1716 return ConstantMatCost +
1717 LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1737 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1740 if (
Info.isSameBase() && V !=
Base) {
1741 if (
GEP->hasAllConstantIndices())
1748 if (
Info.isUnitStride() &&
1754 GEP->getType()->getPointerAddressSpace()))
1757 {TTI::OK_AnyValue, TTI::OP_None},
1758 {TTI::OK_AnyValue, TTI::OP_None},
1776 if (ST->enableDefaultUnroll())
1786 if (L->getHeader()->getParent()->hasOptSize())
1790 L->getExitingBlocks(ExitingBlocks);
1792 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
1793 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
1797 if (ExitingBlocks.
size() > 2)
1802 if (L->getNumBlocks() > 4)
1812 for (
auto *BB : L->getBlocks()) {
1813 for (
auto &
I : *BB) {
1816 if (
I.getType()->isVectorTy())
1819 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1878 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
1897 auto *VTy = dyn_cast<VectorType>(DataTy);
1898 if (!VTy || VTy->isScalableTy())
1911 TM.getSubtargetImpl(*Caller)->getFeatureBits();
1913 TM.getSubtargetImpl(*Callee)->getFeatureBits();
1917 return (CallerBits & CalleeBits) == CalleeBits;
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
uint64_t IntrinsicInst * II
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
static InstructionCost getInvalid(CostType Val=0)
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasConditionalMoveFusion() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)
bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
The main scalar evolution driver.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.