51 #define DEBUG_TYPE "x86tti"
120 bool Vector = (ClassID == 1);
135 if (ST->
hasAVX512() && PreferVectorWidth >= 512)
137 if (ST->
hasAVX() && PreferVectorWidth >= 256)
139 if (ST->
hasSSE1() && PreferVectorWidth >= 128)
183 Op2Info, Opd1PropInfo,
184 Opd2PropInfo,
Args, CxtI);
189 assert(ISD &&
"Invalid opcode");
201 return LT.first * Entry->Cost;
230 bool Op1Signed =
false;
232 bool Op2Signed =
false;
235 bool SignedMode = Op1Signed || Op2Signed;
236 unsigned OpMinSize =
std::max(Op1MinSize, Op2MinSize);
240 if (!SignedMode && OpMinSize <= 8)
244 if (!SignedMode && OpMinSize <= 16)
250 return LT.first * Entry->Cost;
303 static const CostTblEntry AVX512BWUniformConstCostTable[] = {
311 if (
const auto *Entry =
CostTableLookup(AVX512BWUniformConstCostTable, ISD,
313 return LT.first * Entry->Cost;
316 static const CostTblEntry AVX512UniformConstCostTable[] = {
333 if (
const auto *Entry =
CostTableLookup(AVX512UniformConstCostTable, ISD,
335 return LT.first * Entry->Cost;
338 static const CostTblEntry AVX2UniformConstCostTable[] = {
353 if (
const auto *Entry =
CostTableLookup(AVX2UniformConstCostTable, ISD,
355 return LT.first * Entry->Cost;
358 static const CostTblEntry SSE2UniformConstCostTable[] = {
380 if (
const auto *Entry =
382 return LT.first * Entry->Cost;
399 if (
const auto *Entry =
401 return LT.first * Entry->Cost;
422 if (
const auto *Entry =
424 return LT.first * Entry->Cost;
446 return LT.first * Entry->Cost;
481 return LT.first * 32;
483 return LT.first * 38;
485 return LT.first * 15;
487 return LT.first * 20;
490 return LT.first * Entry->Cost;
509 return LT.first * Entry->Cost;
524 if (
const auto *Entry =
526 return LT.first * Entry->Cost;
546 if (
const auto *Entry =
548 return LT.first * Entry->Cost;
560 return LT.first * Entry->Cost;
575 return LT.first * Entry->Cost;
608 return LT.first * Entry->Cost;
650 return LT.first * Entry->Cost;
691 if (
const auto *Entry =
693 return LT.first * Entry->Cost;
696 static const CostTblEntry SSE2UniformShiftCostTable[] = {
720 if (
const auto *Entry =
722 return LT.first * Entry->Cost;
786 return LT.first * Entry->Cost;
822 return LT.first * Entry->Cost;
848 return LT.first * Entry->Cost;
877 return LT.first * Entry->Cost;
919 return LT.first * Entry->Cost;
942 return LT.first * Entry->Cost;
955 return 20 *
LT.first *
LT.second.getVectorNumElements() * ScalarCost;
981 int NumElts =
LT.second.getVectorNumElements();
982 if ((
Index % NumElts) == 0)
985 if (SubLT.second.isVector()) {
986 int NumSubElts = SubLT.second.getVectorNumElements();
987 if ((
Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
995 int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements();
996 if (NumSubElts > OrigSubElts && (
Index % OrigSubElts) == 0 &&
997 (NumSubElts % OrigSubElts) == 0 &&
998 LT.second.getVectorElementType() ==
999 SubLT.second.getVectorElementType() &&
1000 LT.second.getVectorElementType().getSizeInBits() ==
1002 assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
1003 "Unexpected number of elements!");
1005 LT.second.getVectorNumElements());
1007 SubLT.second.getVectorNumElements());
1010 ExtractIndex, SubTy);
1015 return ExtractCost + 1;
1018 "Unexpected vector size");
1020 return ExtractCost + 2;
1030 static const CostTblEntry SSE2SubVectorShuffleTbl[] = {
1056 if (
const auto *Entry =
1065 MVT LegalVT =
LT.second;
1070 cast<FixedVectorType>(BaseTp)->getNumElements()) {
1075 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
1077 unsigned NumOfDests =
LT.first;
1082 unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
1083 return NumOfShuffles *
1093 int NumOfDests =
LT.first;
1094 int NumOfShufflesPerDest =
LT.first * 2 - 1;
1095 LT.first = NumOfDests * NumOfShufflesPerDest;
1111 if (
const auto *Entry =
1113 return LT.first * Entry->Cost;
1137 if (
const auto *Entry =
1139 return LT.first * Entry->Cost;
1198 return LT.first * Entry->Cost;
1239 return LT.first * Entry->Cost;
1261 return LT.first * Entry->Cost;
1308 return LT.first * Entry->Cost;
1321 return LT.first * Entry->Cost;
1342 return LT.first * Entry->Cost;
1381 return LT.first * Entry->Cost;
1393 return LT.first * Entry->Cost;
1403 assert(ISD &&
"Invalid opcode");
1406 auto AdjustCost = [&
CostKind](
int Cost) {
1408 return Cost == 0 ? 0 : 1;
2012 LTDest.second, LTSrc.second))
2013 return AdjustCost(LTSrc.first * Entry->Cost);
2029 SimpleDstTy, SimpleSrcTy))
2030 return AdjustCost(Entry->Cost);
2034 SimpleDstTy, SimpleSrcTy))
2035 return AdjustCost(Entry->Cost);
2039 SimpleDstTy, SimpleSrcTy))
2040 return AdjustCost(Entry->Cost);
2045 SimpleDstTy, SimpleSrcTy))
2046 return AdjustCost(Entry->Cost);
2050 SimpleDstTy, SimpleSrcTy))
2051 return AdjustCost(Entry->Cost);
2055 SimpleDstTy, SimpleSrcTy))
2056 return AdjustCost(Entry->Cost);
2060 SimpleDstTy, SimpleSrcTy))
2061 return AdjustCost(Entry->Cost);
2066 SimpleDstTy, SimpleSrcTy))
2067 return AdjustCost(Entry->Cost);
2072 SimpleDstTy, SimpleSrcTy))
2073 return AdjustCost(Entry->Cost);
2078 SimpleDstTy, SimpleSrcTy))
2079 return AdjustCost(Entry->Cost);
2098 MVT MTy =
LT.second;
2101 assert(ISD &&
"Invalid opcode");
2103 unsigned ExtraCost = 0;
2104 if (
I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) {
2111 case CmpInst::Predicate::ICMP_NE:
2115 case CmpInst::Predicate::ICMP_SGE:
2116 case CmpInst::Predicate::ICMP_SLE:
2120 case CmpInst::Predicate::ICMP_ULT:
2121 case CmpInst::Predicate::ICMP_UGT:
2126 case CmpInst::Predicate::ICMP_ULE:
2127 case CmpInst::Predicate::ICMP_UGE:
2243 return LT.first * (ExtraCost + Entry->Cost);
2247 return LT.first * (ExtraCost + Entry->Cost);
2251 return LT.first * (ExtraCost + Entry->Cost);
2255 return LT.first * (ExtraCost + Entry->Cost);
2259 return LT.first * (ExtraCost + Entry->Cost);
2263 return LT.first * (ExtraCost + Entry->Cost);
2267 return LT.first * (ExtraCost + Entry->Cost);
2271 return LT.first * (ExtraCost + Entry->Cost);
2275 return LT.first * (ExtraCost + Entry->Cost);
2716 case Intrinsic::bitreverse:
2719 case Intrinsic::bswap:
2722 case Intrinsic::ctlz:
2725 case Intrinsic::ctpop:
2728 case Intrinsic::cttz:
2736 case Intrinsic::sadd_sat:
2745 case Intrinsic::ssub_sat:
2748 case Intrinsic::uadd_sat:
2757 case Intrinsic::usub_sat:
2760 case Intrinsic::sqrt:
2763 case Intrinsic::sadd_with_overflow:
2764 case Intrinsic::ssub_with_overflow:
2769 case Intrinsic::uadd_with_overflow:
2770 case Intrinsic::usub_with_overflow:
2775 case Intrinsic::umul_with_overflow:
2776 case Intrinsic::smul_with_overflow:
2786 MVT MTy =
LT.second;
2800 Cost = Cost * 2 + 2;
2802 return LT.first * Cost;
2805 auto adjustTableCost = [](
const CostTblEntry &Entry,
int LegalizationCost,
2812 return LegalizationCost * 1;
2814 return LegalizationCost * (
int)Entry.Cost;
2819 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2823 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2827 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2831 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2835 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2839 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2843 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2847 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2851 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2855 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2859 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2863 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2867 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2872 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2875 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2881 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2884 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2890 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2893 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2900 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2903 return adjustTableCost(*Entry,
LT.first, ICA.
getFlags());
2971 case Intrinsic::fshl:
2976 case Intrinsic::fshr:
2987 MVT MTy =
LT.second;
2992 return LT.first * Entry->Cost;
2996 return LT.first * Entry->Cost;
3000 return LT.first * Entry->Cost;
3003 return LT.first * Entry->Cost;
3019 int RegisterFileMoveCost = 0;
3021 if (
Index != -1U && (Opcode == Instruction::ExtractElement ||
3022 Opcode == Instruction::InsertElement)) {
3027 if (!
LT.second.isVector())
3031 unsigned NumElts =
LT.second.getVectorNumElements();
3032 unsigned SubNumElts = NumElts;
3037 if (
LT.second.getSizeInBits() > 128) {
3038 assert((
LT.second.getSizeInBits() % 128) == 0 &&
"Illegal vector");
3039 unsigned NumSubVecs =
LT.second.getSizeInBits() / 128;
3040 SubNumElts = NumElts / NumSubVecs;
3041 if (SubNumElts <=
Index) {
3042 RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1);
3043 Index %= SubNumElts;
3052 return RegisterFileMoveCost;
3055 if (ScalarType->
isIntegerTy() && Opcode == Instruction::ExtractElement)
3056 return 1 + RegisterFileMoveCost;
3060 assert(ISD &&
"Unexpected vector opcode");
3061 MVT MScalarTy =
LT.second.getScalarType();
3064 return Entry->Cost + RegisterFileMoveCost;
3069 return 1 + RegisterFileMoveCost;
3073 Opcode == Instruction::InsertElement)
3074 return 1 + RegisterFileMoveCost;
3082 int ShuffleCost = 1;
3083 if (Opcode == Instruction::InsertElement) {
3084 auto *SubTy = cast<VectorType>(Val);
3091 return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
3096 if (Opcode == Instruction::ExtractElement && ScalarType->
isPointerTy())
3097 RegisterFileMoveCost += 1;
3103 const APInt &DemandedElts,
3104 bool Insert,
bool Extract) {
3111 MVT MScalarTy =
LT.second.getScalarType();
3118 if (
LT.second.getSizeInBits() <= 128) {
3134 unsigned Num128Lanes =
LT.second.getSizeInBits() / 128 *
LT.first;
3135 unsigned NumElts =
LT.second.getVectorNumElements() *
LT.first;
3137 unsigned Scale = NumElts / Num128Lanes;
3140 for (
unsigned I = 0;
I < NumElts;
I += Scale) {
3142 APInt MaskedDE =
Mask & WidenedDemandedElts;
3144 Cost += (Population > 0 && Population != Scale &&
3145 I %
LT.second.getVectorNumElements() != 0);
3146 Cost += Population > 0;
3154 for (
unsigned i = 0,
e = cast<FixedVectorType>(Ty)->getNumElements();
3156 if (DemandedElts[
i])
3159 }
else if (
LT.second.isVector()) {
3170 unsigned NumElts =
LT.second.getVectorNumElements();
3172 PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements());
3173 Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) *
LT.first;
3191 if (
auto *
SI = dyn_cast_or_null<StoreInst>(
I)) {
3194 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(
SI->getPointerOperand())) {
3195 if (!
all_of(
GEP->indices(), [](
Value *V) { return isa<Constant>(V); }))
3203 if (
auto *VTy = dyn_cast<FixedVectorType>(Src)) {
3204 unsigned NumElem = VTy->getNumElements();
3208 if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
3213 if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
3225 return NumElem * Cost + SplitCost;
3240 int Cost =
LT.first * 1;
3256 auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy);
3261 unsigned NumElem = SrcVTy->getNumElements();
3275 int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
3276 int ValueSplitCost =
3281 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
3288 if (VT.isSimple() &&
LT.second != VT.getSimpleVT() &&
3289 LT.second.getVectorNumElements() == NumElem)
3294 else if (
LT.second.getVectorNumElements() > NumElem) {
3296 LT.second.getVectorNumElements());
3303 return Cost +
LT.first * (IsLoad ? 2 : 8);
3306 return Cost +
LT.first;
3315 const unsigned NumVectorInstToHideOverhead = 10;
3326 return NumVectorInstToHideOverhead;
3376 assert(ISD &&
"Invalid opcode");
3385 if (
const auto *Entry =
CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
3389 if (
const auto *Entry =
CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
3393 if (
const auto *Entry =
CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
3399 MVT MTy =
LT.second;
3401 auto *ValVTy = cast<FixedVectorType>(ValTy);
3403 unsigned ArithmeticCost = 0;
3410 ArithmeticCost *=
LT.first - 1;
3414 if (
const auto *Entry =
CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
3415 return ArithmeticCost + Entry->Cost;
3418 if (
const auto *Entry =
CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
3419 return ArithmeticCost + Entry->Cost;
3422 if (
const auto *Entry =
CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
3423 return ArithmeticCost + Entry->Cost;
3472 if (ValVTy->getElementType()->isIntegerTy(1)) {
3473 unsigned ArithmeticCost = 0;
3480 ArithmeticCost *=
LT.first - 1;
3484 if (
const auto *Entry =
CostTableLookup(AVX512BoolReduction, ISD, MTy))
3485 return ArithmeticCost + Entry->Cost;
3488 return ArithmeticCost + Entry->Cost;
3491 return ArithmeticCost + Entry->Cost;
3494 return ArithmeticCost + Entry->Cost;
3500 unsigned NumVecElts = ValVTy->getNumElements();
3501 unsigned ScalarSize = ValVTy->getScalarSize