21#define DEBUG_TYPE "riscvtti"
24 "riscv-v-register-bit-width-lmul",
26 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
27 "by autovectorized code. Fractional LMULs are not supported."),
33 "Overrides result used for getMaximumVF query which is used "
34 "exclusively by SLP vectorizer."),
43 size_t NumInstr = OpCodes.
size();
48 return LMULCost * NumInstr;
50 for (
auto Op : OpCodes) {
52 case RISCV::VRGATHER_VI:
55 case RISCV::VRGATHER_VV:
58 case RISCV::VSLIDEUP_VI:
59 case RISCV::VSLIDEDOWN_VI:
62 case RISCV::VSLIDEUP_VX:
63 case RISCV::VSLIDEDOWN_VX:
66 case RISCV::VREDMAX_VS:
67 case RISCV::VREDMIN_VS:
68 case RISCV::VREDMAXU_VS:
69 case RISCV::VREDMINU_VS:
70 case RISCV::VREDSUM_VS:
71 case RISCV::VREDAND_VS:
72 case RISCV::VREDOR_VS:
73 case RISCV::VREDXOR_VS:
74 case RISCV::VFREDMAX_VS:
75 case RISCV::VFREDMIN_VS:
76 case RISCV::VFREDUSUM_VS: {
83 case RISCV::VFREDOSUM_VS: {
97 case RISCV::VMANDN_MM:
98 case RISCV::VMNAND_MM:
100 case RISCV::VFIRST_M:
113 "getIntImmCost can only estimate cost of materialising integers");
129 auto *BO = dyn_cast<BinaryOperator>(Inst->
getOperand(0));
130 if (!BO || !BO->hasOneUse())
133 if (BO->getOpcode() != Instruction::Shl)
136 if (!isa<ConstantInt>(BO->getOperand(1)))
139 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
144 if (ShAmt == Trailing)
156 "getIntImmCost can only estimate cost of materialising integers");
164 bool Takes12BitImm =
false;
165 unsigned ImmArgIdx = ~0U;
168 case Instruction::GetElementPtr:
173 case Instruction::Store:
178 case Instruction::Load:
181 case Instruction::And:
183 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
186 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
189 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
191 if (Inst &&
Idx == 1 && Imm.getBitWidth() <= ST->
getXLen() &&
194 Takes12BitImm =
true;
196 case Instruction::Add:
197 Takes12BitImm =
true;
199 case Instruction::Or:
200 case Instruction::Xor:
202 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
204 Takes12BitImm =
true;
206 case Instruction::Mul:
208 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
211 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
214 Takes12BitImm =
true;
216 case Instruction::Sub:
217 case Instruction::Shl:
218 case Instruction::LShr:
219 case Instruction::AShr:
220 Takes12BitImm =
true;
231 if (Imm.getSignificantBits() <= 64 &&
260 return ST->hasStdExtZbb() || ST->hasVendorXCVbitmanip()
273 case Intrinsic::vector_reduce_mul:
274 case Intrinsic::vector_reduce_fmul:
330 return cast<VectorType>(
EVT(IndexVT).getTypeForEVT(
C));
346 if (isa<FixedVectorType>(Tp)) {
351 if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
352 MVT EltTp = LT.second.getVectorElementType();
365 if (Mask[0] == 0 || Mask[0] == 1) {
369 if (
equal(DeinterleaveMask, Mask))
370 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
377 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
378 (LT.second.getScalarSizeInBits() != 8 ||
379 LT.second.getVectorNumElements() <= 256)) {
383 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
392 if (LT.second.isFixedLengthVector() && LT.first == 1 &&
393 (LT.second.getScalarSizeInBits() != 8 ||
394 LT.second.getVectorNumElements() <= 256)) {
401 return 2 * IndexCost +
402 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
412 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
413 LT.second.isFixedLengthVector() &&
414 LT.second.getVectorElementType().getSizeInBits() ==
416 LT.second.getVectorNumElements() <
417 cast<FixedVectorType>(Tp)->getNumElements() &&
419 cast<FixedVectorType>(Tp)->getNumElements()) ==
420 static_cast<unsigned>(*LT.first.getValue())) {
421 unsigned NumRegs = *LT.first.getValue();
422 unsigned VF = cast<FixedVectorType>(Tp)->getNumElements();
427 for (
unsigned I = 0;
I < NumRegs; ++
I) {
428 bool IsSingleVector =
true;
431 I == NumRegs - 1 ? Mask.size() % SubVF : SubVF),
432 SubMask.
begin(), [&](
int I) {
433 bool SingleSubVector = I / VF == 0;
434 IsSingleVector &= SingleSubVector;
435 return (SingleSubVector ? 0 : 1) * SubVF + I % VF;
439 SubVecTy, SubMask,
CostKind, 0,
nullptr);
467 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
470 if (MinVLen == MaxVLen &&
471 SubLT.second.getScalarSizeInBits() *
Index % MinVLen == 0 &&
472 SubLT.second.getSizeInBits() <= MinVLen)
480 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
486 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
498 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
503 Instruction::InsertElement);
504 if (LT.second.getScalarSizeInBits() == 1) {
512 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
525 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
526 RISCV::VMV_X_S, RISCV::VMV_V_X,
535 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
541 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
547 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
549 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
550 else if (Index < 0 && Index > -32)
551 Opcodes[1] = RISCV::VSLIDEUP_VI;
552 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
570 if (LT.second.isFixedLengthVector())
572 LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
573 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
574 if (LT.second.isFixedLengthVector() &&
575 isInt<5>(LT.second.getVectorNumElements() - 1))
576 Opcodes[1] = RISCV::VRSUB_VI;
578 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
581 return LT.first * (LenCost + GatherCost + ExtendCost);
602 bool UseMaskForCond,
bool UseMaskForGaps) {
603 if (isa<ScalableVectorType>(VecTy) && Factor != 2)
610 if (!UseMaskForCond && !UseMaskForGaps &&
611 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
612 auto *VTy = cast<VectorType>(VecTy);
615 if (LT.second.isVector()) {
617 LT.second.getVectorElementCount());
626 return LT.first + LegalMemCost;
633 if (isa<ScalableVectorType>(VecTy))
636 auto *FVTy = cast<FixedVectorType>(VecTy);
639 unsigned VF = FVTy->getNumElements() / Factor;
646 if (Opcode == Instruction::Load) {
648 for (
unsigned Index : Indices) {
673 UseMaskForCond, UseMaskForGaps);
675 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
682 return MemCost + ShuffleCost;
686 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
692 if ((Opcode == Instruction::Load &&
694 (Opcode == Instruction::Store &&
702 auto &VTy = *cast<VectorType>(DataTy);
705 {TTI::OK_AnyValue, TTI::OP_None},
I);
706 unsigned NumLoads = getEstimatedVLFor(&VTy);
707 return NumLoads * MemOpCost;
711 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
713 if (((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
715 (Opcode != Instruction::Load && Opcode != Instruction::Store))
725 auto &VTy = *cast<VectorType>(DataTy);
728 {TTI::OK_AnyValue, TTI::OP_None},
I);
729 unsigned NumLoads = getEstimatedVLFor(&VTy);
730 return NumLoads * MemOpCost;
738 {Intrinsic::floor, MVT::f32, 9},
739 {Intrinsic::floor, MVT::f64, 9},
740 {Intrinsic::ceil, MVT::f32, 9},
741 {Intrinsic::ceil, MVT::f64, 9},
742 {Intrinsic::trunc, MVT::f32, 7},
743 {Intrinsic::trunc, MVT::f64, 7},
744 {Intrinsic::round, MVT::f32, 9},
745 {Intrinsic::round, MVT::f64, 9},
746 {Intrinsic::roundeven, MVT::f32, 9},
747 {Intrinsic::roundeven, MVT::f64, 9},
748 {Intrinsic::rint, MVT::f32, 7},
749 {Intrinsic::rint, MVT::f64, 7},
750 {Intrinsic::lrint, MVT::i32, 1},
751 {Intrinsic::lrint, MVT::i64, 1},
752 {Intrinsic::llrint, MVT::i64, 1},
753 {Intrinsic::nearbyint, MVT::f32, 9},
754 {Intrinsic::nearbyint, MVT::f64, 9},
755 {Intrinsic::bswap, MVT::i16, 3},
756 {Intrinsic::bswap, MVT::i32, 12},
757 {Intrinsic::bswap, MVT::i64, 31},
758 {Intrinsic::vp_bswap, MVT::i16, 3},
759 {Intrinsic::vp_bswap, MVT::i32, 12},
760 {Intrinsic::vp_bswap, MVT::i64, 31},
761 {Intrinsic::vp_fshl, MVT::i8, 7},
762 {Intrinsic::vp_fshl, MVT::i16, 7},
763 {Intrinsic::vp_fshl, MVT::i32, 7},
764 {Intrinsic::vp_fshl, MVT::i64, 7},
765 {Intrinsic::vp_fshr, MVT::i8, 7},
766 {Intrinsic::vp_fshr, MVT::i16, 7},
767 {Intrinsic::vp_fshr, MVT::i32, 7},
768 {Intrinsic::vp_fshr, MVT::i64, 7},
769 {Intrinsic::bitreverse, MVT::i8, 17},
770 {Intrinsic::bitreverse, MVT::i16, 24},
771 {Intrinsic::bitreverse, MVT::i32, 33},
772 {Intrinsic::bitreverse, MVT::i64, 52},
773 {Intrinsic::vp_bitreverse, MVT::i8, 17},
774 {Intrinsic::vp_bitreverse, MVT::i16, 24},
775 {Intrinsic::vp_bitreverse, MVT::i32, 33},
776 {Intrinsic::vp_bitreverse, MVT::i64, 52},
777 {Intrinsic::ctpop, MVT::i8, 12},
778 {Intrinsic::ctpop, MVT::i16, 19},
779 {Intrinsic::ctpop, MVT::i32, 20},
780 {Intrinsic::ctpop, MVT::i64, 21},
781 {Intrinsic::vp_ctpop, MVT::i8, 12},
782 {Intrinsic::vp_ctpop, MVT::i16, 19},
783 {Intrinsic::vp_ctpop, MVT::i32, 20},
784 {Intrinsic::vp_ctpop, MVT::i64, 21},
785 {Intrinsic::vp_ctlz, MVT::i8, 19},
786 {Intrinsic::vp_ctlz, MVT::i16, 28},
787 {Intrinsic::vp_ctlz, MVT::i32, 31},
788 {Intrinsic::vp_ctlz, MVT::i64, 35},
789 {Intrinsic::vp_cttz, MVT::i8, 16},
790 {Intrinsic::vp_cttz, MVT::i16, 23},
791 {Intrinsic::vp_cttz, MVT::i32, 24},
792 {Intrinsic::vp_cttz, MVT::i64, 25},
797#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
798 case Intrinsic::VPID: \
800#include "llvm/IR/VPIntrinsics.def"
801#undef HELPER_MAP_VPID_TO_VPSD
810 switch (ICA.
getID()) {
811 case Intrinsic::ceil:
812 case Intrinsic::floor:
813 case Intrinsic::trunc:
814 case Intrinsic::rint:
815 case Intrinsic::lrint:
816 case Intrinsic::llrint:
817 case Intrinsic::round:
818 case Intrinsic::roundeven: {
825 case Intrinsic::umin:
826 case Intrinsic::umax:
827 case Intrinsic::smin:
828 case Intrinsic::smax: {
830 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
835 switch (ICA.
getID()) {
836 case Intrinsic::umin:
837 Op = RISCV::VMINU_VV;
839 case Intrinsic::umax:
840 Op = RISCV::VMAXU_VV;
842 case Intrinsic::smin:
845 case Intrinsic::smax:
849 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
853 case Intrinsic::sadd_sat:
854 case Intrinsic::ssub_sat:
855 case Intrinsic::uadd_sat:
856 case Intrinsic::usub_sat:
857 case Intrinsic::fabs:
858 case Intrinsic::sqrt: {
864 case Intrinsic::ctpop: {
870 case Intrinsic::abs: {
879 case Intrinsic::get_active_lane_mask: {
889 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
895 case Intrinsic::experimental_stepvector: {
900 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
902 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
903 return 1 + (LT.first - 1);
905 case Intrinsic::experimental_cttz_elts: {
917 cast<ConstantInt>(ICA.
getArgs()[1])->isZero())
925 case Intrinsic::vp_rint: {
930 return Cost * LT.first;
933 case Intrinsic::vp_nearbyint: {
938 return Cost * LT.first;
941 case Intrinsic::vp_ceil:
942 case Intrinsic::vp_floor:
943 case Intrinsic::vp_round:
944 case Intrinsic::vp_roundeven:
945 case Intrinsic::vp_roundtozero: {
952 return Cost * LT.first;
959 LT.second.isVector()) {
960 MVT EltTy = LT.second.getVectorElementType();
963 return LT.first * Entry->Cost;
975 bool IsVectorType = isa<VectorType>(Dst) && isa<VectorType>(Src);
980 (Src->getScalarSizeInBits() <= ST->
getELen()) &&
981 (Dst->getScalarSizeInBits() <= ST->
getELen());
991 assert(ISD &&
"Invalid opcode");
993 int PowDiff = (int)
Log2_32(Dst->getScalarSizeInBits()) -
994 (
int)
Log2_32(Src->getScalarSizeInBits());
998 const unsigned SrcEltSize = Src->getScalarSizeInBits();
999 if (SrcEltSize == 1) {
1004 return getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM},
1007 if ((PowDiff < 1) || (PowDiff > 3))
1009 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1010 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1013 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1016 if (Dst->getScalarSizeInBits() == 1) {
1022 return getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1029 unsigned SrcEltSize = Src->getScalarSizeInBits();
1030 unsigned DstEltSize = Dst->getScalarSizeInBits();
1034 : RISCV::VFNCVT_F_F_W;
1036 for (; SrcEltSize != DstEltSize;) {
1042 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1051 if (Src->getScalarSizeInBits() == 1 || Dst->getScalarSizeInBits() == 1) {
1065 if (std::abs(PowDiff) <= 1)
1069 if (Src->isIntOrIntVectorTy())
1072 return std::abs(PowDiff);
1077unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty) {
1078 if (isa<ScalableVectorType>(Ty)) {
1084 return cast<FixedVectorType>(Ty)->getNumElements();
1103 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
1109 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
1113 case Intrinsic::maximum:
1115 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1117 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
1132 case Intrinsic::minimum:
1134 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1136 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
1151 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1160 case Intrinsic::smax:
1161 SplitOp = RISCV::VMAX_VV;
1162 Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAX_VS, RISCV::VMV_X_S};
1164 case Intrinsic::smin:
1165 SplitOp = RISCV::VMIN_VV;
1166 Opcodes = {RISCV::VMV_S_X, RISCV::VREDMIN_VS, RISCV::VMV_X_S};
1168 case Intrinsic::umax:
1169 SplitOp = RISCV::VMAXU_VV;
1170 Opcodes = {RISCV::VMV_S_X, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1172 case Intrinsic::umin:
1173 SplitOp = RISCV::VMINU_VV;
1174 Opcodes = {RISCV::VMV_S_X, RISCV::VREDMINU_VS, RISCV::VMV_X_S};
1176 case Intrinsic::maxnum:
1177 SplitOp = RISCV::VFMAX_VV;
1178 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
1180 case Intrinsic::minnum:
1181 SplitOp = RISCV::VFMIN_VV;
1182 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
1187 (LT.first > 1) ? (LT.first - 1) *
1188 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1190 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1195 std::optional<FastMathFlags> FMF,
1205 assert(ISD &&
"Invalid opcode");
1221 Opcodes = {RISCV::VMNAND_MM, RISCV::VCPOP_M};
1222 return (LT.first - 1) +
1223 getRISCVInstructionCost(Opcodes, LT.second,
CostKind) +
1231 Opcodes = {RISCV::VCPOP_M};
1232 return (LT.first - 1) +
1233 getRISCVInstructionCost(Opcodes, LT.second,
CostKind) +
1242 for (
unsigned i = 0; i < LT.first.getValue(); i++)
1245 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1250 SplitOp = RISCV::VADD_VV;
1251 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
1254 SplitOp = RISCV::VOR_VV;
1255 Opcodes = {RISCV::VMV_S_X, RISCV::VREDOR_VS, RISCV::VMV_X_S};
1258 SplitOp = RISCV::VXOR_VV;
1259 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
1262 SplitOp = RISCV::VAND_VV;
1263 Opcodes = {RISCV::VMV_S_X, RISCV::VREDAND_VS, RISCV::VMV_X_S};
1266 SplitOp = RISCV::VFADD_VV;
1267 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
1272 (LT.first > 1) ? (LT.first - 1) *
1273 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
1275 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1279 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1290 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
1300 return (LT.first - 1) +
1308 if (!isa<VectorType>(Ty))
1320 return getConstantPoolLoadCost(Ty,
CostKind);
1332 if (VT == MVT::Other)
1337 if (Opcode == Instruction::Store && OpInfo.
isConstant())
1348 return Cost + BaseCost;
1371 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
1378 getRISCVInstructionCost(
1379 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1384 getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
CostKind);
1393 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
1395 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
1397 LT.first * getRISCVInstructionCost(
1398 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
1405 return LT.first * getRISCVInstructionCost(
1406 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
1410 if ((Opcode == Instruction::ICmp) && ValTy->
isVectorTy() &&
1415 getRISCVInstructionCost(RISCV::VMSLT_VV, LT.second,
CostKind);
1418 if ((Opcode == Instruction::FCmp) && ValTy->
isVectorTy() &&
1423 return getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
1442 return LT.first * getRISCVInstructionCost(
1443 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
1451 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
1461 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
1476 return Opcode == Instruction::PHI ? 0 : 1;
1487 if (Opcode != Instruction::ExtractElement &&
1488 Opcode != Instruction::InsertElement)
1495 if (!LT.second.isVector()) {
1496 auto *FixedVecTy = cast<FixedVectorType>(Val);
1504 Type *ElemTy = FixedVecTy->getElementType();
1505 auto NumElems = FixedVecTy->getNumElements();
1511 return Opcode == Instruction::ExtractElement
1512 ? StoreCost * NumElems + LoadCost
1513 : (StoreCost + LoadCost) * NumElems + StoreCost;
1517 if (LT.second.isScalableVector() && !LT.first.isValid())
1527 cast<VectorType>(Val)->getElementCount());
1528 if (Opcode == Instruction::ExtractElement) {
1534 return ExtendCost + ExtractCost;
1544 return ExtendCost + InsertCost + TruncCost;
1550 unsigned BaseCost = 1;
1552 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
1557 if (LT.second.isFixedLengthVector()) {
1558 unsigned Width = LT.second.getVectorNumElements();
1565 else if (Opcode == Instruction::InsertElement)
1590 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
1592 return BaseCost + SlideCost;
1618 if (!LT.second.isVector())
1623 auto getConstantMatCost =
1633 return getConstantPoolLoadCost(Ty,
CostKind);
1639 ConstantMatCost += getConstantMatCost(0, Op1Info);
1641 ConstantMatCost += getConstantMatCost(1, Op2Info);
1647 Op = RISCV::VADD_VV;
1652 Op = RISCV::VSLL_VV;
1662 Op = RISCV::VMUL_VV;
1666 Op = RISCV::VDIV_VV;
1670 Op = RISCV::VREM_VV;
1675 Op = RISCV::VFADD_VV;
1679 Op = RISCV::VFMUL_VV;
1682 Op = RISCV::VFDIV_VV;
1685 Op = RISCV::VFSGNJN_VV;
1694 return ConstantMatCost +
1695 LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1715 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1718 if (
Info.isSameBase() && V !=
Base) {
1719 if (
GEP->hasAllConstantIndices())
1726 if (
Info.isUnitStride() &&
1732 GEP->getType()->getPointerAddressSpace()))
1735 {TTI::OK_AnyValue, TTI::OP_None},
1736 {TTI::OK_AnyValue, TTI::OP_None},
1754 if (ST->enableDefaultUnroll())
1764 if (L->getHeader()->getParent()->hasOptSize())
1768 L->getExitingBlocks(ExitingBlocks);
1770 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
1771 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
1775 if (ExitingBlocks.
size() > 2)
1780 if (L->getNumBlocks() > 4)
1790 for (
auto *BB : L->getBlocks()) {
1791 for (
auto &
I : *BB) {
1794 if (
I.getType()->isVectorTy())
1797 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
1856 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
1871 auto *VTy = dyn_cast<VectorType>(DataTy);
1872 if (!VTy || VTy->isScalableTy())
1885 TM.getSubtargetImpl(*Caller)->getFeatureBits();
1887 TM.getSubtargetImpl(*Callee)->getFeatureBits();
1891 return (CallerBits & CalleeBits) == CalleeBits;
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
static InstructionCost getInvalid(CostType Val=0)
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr)
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, FastMathFlags FMF, TTI::TargetCostKind CostKind)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment)
bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
bool shouldExpandCttzElements(EVT VT) const override
Return true if the @llvm.experimental.cttz.elts intrinsic should be expanded using generic code in Se...
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, const DataLayout &) const
Returns whether or not generating a interleaved load/store intrinsic for this type will be legal.
The main scalar evolution driver.
static bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
const TargetMachine & getTargetMachine() const
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr int PoisonMaskElem
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.