52 #define DEBUG_TYPE "x86tti"
117 unsigned Opcode,
Type *Ty,
126 assert(ISD &&
"Invalid opcode");
150 bool Op1Signed =
false;
152 bool Op2Signed =
false;
155 bool signedMode = Op1Signed | Op2Signed;
156 unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
160 if (!signedMode && OpMinSize <= 8)
164 if (!signedMode && OpMinSize <= 16)
169 return LT.first * Entry->Cost;
193 static const CostTblEntry AVX512BWUniformConstCostTable[] = {
204 if (
const auto *Entry =
CostTableLookup(AVX512BWUniformConstCostTable, ISD,
206 return LT.first * Entry->Cost;
209 static const CostTblEntry AVX512UniformConstCostTable[] = {
216 if (
const auto *Entry =
CostTableLookup(AVX512UniformConstCostTable, ISD,
218 return LT.first * Entry->Cost;
221 static const CostTblEntry AVX2UniformConstCostTable[] = {
236 if (
const auto *Entry =
CostTableLookup(AVX2UniformConstCostTable, ISD,
238 return LT.first * Entry->Cost;
241 static const CostTblEntry SSE2UniformConstCostTable[] = {
264 return LT.first * 30;
266 return LT.first * 15;
270 return LT.first * Entry->Cost;
283 if (
const auto *Entry =
285 return LT.first * Entry->Cost;
305 if (
const auto *Entry =
307 return LT.first * Entry->Cost;
318 if (
const auto *Entry =
CostTableLookup(AVX512DQCostTable, ISD, LT.second))
319 return LT.first * Entry->Cost;
344 return LT.first * Entry->Cost;
368 return LT.first * Entry->Cost;
387 if (ISD ==
ISD::SHL &&
LT.second == MVT::v16i16 &&
388 (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
389 Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
395 return LT.first * Entry->Cost;
430 return LT.first * Entry->Cost;
432 static const CostTblEntry SSE2UniformShiftCostTable[] = {
449 ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
450 (Op2Info == TargetTransformInfo::OK_UniformValue))) {
451 if (
const auto *Entry =
453 return LT.first * Entry->Cost;
457 Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
461 if (((VT == MVT::v8i16 || VT == MVT::v4i32) &&
ST->hasSSE2()) ||
462 ((VT == MVT::v16i16 || VT == MVT::v8i32) &&
ST->hasAVX()))
504 return LT.first * Entry->Cost;
550 return LT.first * Entry->Cost;
561 return LT.first * Entry->Cost;
590 return LT.first * Entry->Cost;
642 return LT.first * Entry->Cost;
651 return LT.first * Entry->Cost;
654 return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
673 MVT LegalVT = LT.second;
681 unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
683 unsigned NumOfDests = LT.first;
688 unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
689 return NumOfShuffles *
699 int NumOfDests = LT.first;
700 int NumOfShufflesPerDest = LT.first * 2 - 1;
701 LT.first = NumOfDests * NumOfShufflesPerDest;
717 if (
const auto *Entry =
719 return LT.first * Entry->Cost;
744 if (
const auto *Entry =
746 return LT.first * Entry->Cost;
789 return LT.first * Entry->Cost;
812 return LT.first * Entry->Cost;
841 return LT.first * Entry->Cost;
854 return LT.first * Entry->Cost;
869 return LT.first * Entry->Cost;
894 return LT.first * Entry->Cost;
904 return LT.first * Entry->Cost;
906 return BaseT::getShuffleCost(
Kind, Tp, Index, SubTp);
911 assert(ISD &&
"Invalid opcode");
1221 LTDest.second, LTSrc.second))
1222 return LTSrc.first * Entry->Cost;
1279 MVT MTy = LT.second;
1282 assert(ISD &&
"Invalid opcode");
1323 return LT.first * Entry->Cost;
1327 return LT.first * Entry->Cost;
1331 return LT.first * Entry->Cost;
1335 return LT.first * Entry->Cost;
1339 return LT.first * Entry->Cost;
1473 case Intrinsic::bitreverse:
1476 case Intrinsic::bswap:
1479 case Intrinsic::ctlz:
1482 case Intrinsic::ctpop:
1485 case Intrinsic::cttz:
1488 case Intrinsic::sqrt:
1495 MVT MTy = LT.second;
1500 return LT.first * Entry->Cost;
1504 return LT.first * Entry->Cost;
1508 return LT.first * Entry->Cost;
1512 return LT.first * Entry->Cost;
1516 return LT.first * Entry->Cost;
1520 return LT.first * Entry->Cost;
1524 return LT.first * Entry->Cost;
1544 if (!LT.second.isVector())
1548 unsigned Width = LT.second.getVectorNumElements();
1549 Index = Index % Width;
1552 if (ScalarType->isFloatingPointTy() && Index == 0)
1558 int RegisterFileMoveCost = 0;
1559 if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
1560 RegisterFileMoveCost = 1;
1565 int X86TTIImpl::getScalarizationOverhead(
Type *Ty,
bool Insert,
bool Extract) {
1582 if (
VectorType *VTy = dyn_cast<VectorType>(Src)) {
1583 unsigned NumElem = VTy->getVectorNumElements();
1587 if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
1592 if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
1602 return NumElem * Cost + SplitCost;
1612 int Cost = LT.first * 1;
1637 int MaskSplitCost = getScalarizationOverhead(MaskTy,
false,
true);
1641 int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
1643 int ValueSplitCost = getScalarizationOverhead(
1648 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
1655 if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
1661 else if (LT.second.getVectorNumElements() > NumElem) {
1668 return Cost + LT.first*4;
1671 return Cost+LT.first;
1680 unsigned NumVectorInstToHideOverhead = 10;
1691 return NumVectorInstToHideOverhead;
1704 MVT MTy = LT.second;
1707 assert(ISD &&
"Invalid opcode");
1752 if (
const auto *Entry =
CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
1753 return LT.first * Entry->Cost;
1756 if (
const auto *Entry =
CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
1757 return LT.first * Entry->Cost;
1760 if (
const auto *Entry =
CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
1761 return LT.first * Entry->Cost;
1764 if (
const auto *Entry =
CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
1765 return LT.first * Entry->Cost;
1804 ImmVal = Imm.
sext((BitSize + 63) & ~0x3fU);
1809 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
1815 return std::max(1, Cost);
1828 unsigned ImmIdx = ~0U;
1832 case Instruction::GetElementPtr:
1842 case Instruction::ICmp:
1850 if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
1863 case Instruction::Sub:
1864 case Instruction::Mul:
1865 case Instruction::UDiv:
1866 case Instruction::SDiv:
1867 case Instruction::URem:
1868 case Instruction::SRem:
1874 case Instruction::Shl:
1875 case Instruction::LShr:
1876 case Instruction::AShr:
1880 case Instruction::Trunc:
1881 case Instruction::ZExt:
1882 case Instruction::SExt:
1883 case Instruction::IntToPtr:
1884 case Instruction::PtrToInt:
1885 case Instruction::BitCast:
1886 case Instruction::PHI:
1894 if (Idx == ImmIdx) {
1895 int NumConstants = (BitSize + 63) / 64;
1918 case Intrinsic::sadd_with_overflow:
1919 case Intrinsic::uadd_with_overflow:
1920 case Intrinsic::ssub_with_overflow:
1921 case Intrinsic::usub_with_overflow:
1922 case Intrinsic::smul_with_overflow:
1923 case Intrinsic::umul_with_overflow:
1927 case Intrinsic::experimental_stackmap:
1931 case Intrinsic::experimental_patchpoint_void:
1932 case Intrinsic::experimental_patchpoint_i64:
1941 int X86TTIImpl::getGSVectorCost(
unsigned Opcode,
Type *SrcVTy,
Value *
Ptr,
1944 assert(isa<VectorType>(SrcVTy) &&
"Unexpected type in getGSVectorCost");
1955 if (IndexSize < 64 || !GEP)
1958 unsigned NumOfVarIndices = 0;
1970 ++NumOfVarIndices > 1)
1973 return (
unsigned)32;
1979 unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr,
DL) :
1986 int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
1987 if (SplitFactor > 1) {
1990 return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment,
1996 const int GSOverhead = 2;
2009 int X86TTIImpl::getGSScalarCost(
unsigned Opcode,
Type *SrcVTy,
2010 bool VariableMask,
unsigned Alignment,
2011 unsigned AddressSpace) {
2014 int MaskUnpackCost = 0;
2018 MaskUnpackCost = getScalarizationOverhead(MaskTy,
false,
true);
2019 int ScalarCompareCost =
2023 MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
2030 int InsertExtractCost = 0;
2032 for (
unsigned i = 0;
i < VF; ++
i)
2034 InsertExtractCost +=
2037 for (
unsigned i = 0;
i < VF; ++
i)
2039 InsertExtractCost +=
2042 return MemoryOpCost + MaskUnpackCost + InsertExtractCost;
2047 Value *Ptr,
bool VariableMask,
2048 unsigned Alignment) {
2054 assert(PtrTy &&
"Unexpected type for Ptr argument");
2057 bool Scalarize =
false;
2067 if (VF == 2 || (VF == 4 && !ST->
hasVLX()))
2071 return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment,
2074 return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
2079 int DataWidth = isa<PointerType>(ScalarTy) ?
2082 return ((DataWidth == 32 || DataWidth == 64) && ST->
hasAVX()) ||
2083 ((DataWidth == 8 || DataWidth == 16) && ST->
hasBWI());
2104 int DataWidth = isa<PointerType>(ScalarTy) ?
2108 return (DataWidth == 32 || DataWidth == 64) && ST->
hasAVX512();
2128 return (CallerBits & CalleeBits) == CalleeBits;
2146 unsigned AddressSpace) {
2157 unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
2162 unsigned MemOpCost =
2172 unsigned ShuffleCost =
2175 unsigned NumOfLoadsInInterleaveGrp =
2176 Indices.
size() ? Indices.
size() : Factor;
2179 unsigned NumOfResults =
2181 NumOfLoadsInInterleaveGrp;
2185 unsigned NumOfUnfoldedLoads =
2186 NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
2189 unsigned NumOfShufflesPerResult =
2190 std::max((
unsigned)1, (
unsigned)(NumOfMemOps - 1));
2195 unsigned NumOfMoves = 0;
2197 NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
2199 int Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
2200 NumOfUnfoldedLoads * MemOpCost + NumOfMoves;
2207 "Expected Store Instruction at this point");
2211 unsigned NumOfSources = Factor;
2212 unsigned ShuffleCost =
2214 unsigned NumOfShufflesPerStore = NumOfSources - 1;
2218 unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
2219 int Cost = NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
2228 unsigned AddressSpace) {
2229 auto isSupportedOnAVX512 = [](
Type *VecTy,
bool &RequiresBW) {
2242 bool HasAVX512Solution = isSupportedOnAVX512(VecTy, RequiresBW);
2243 if (ST->
hasAVX512() && HasAVX512Solution && (!RequiresBW || ST->
hasBWI()))
2245 Alignment, AddressSpace);
2247 Alignment, AddressSpace);
constexpr bool isUInt< 32 >(uint64_t x)
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
APInt ashr(unsigned shiftAmt) const
Arithmetic right-shift function.
A parsed version of the target data layout string in and methods for querying it. ...
Value * getPointerOperand()
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
uint64_t getZExtValue() const
Get zero extended value.
DELETED_NODE - This is an illegal value that is used to catch errors.
Cost tables and simple lookup functions.
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
Calculate the cost of Gather / Scatter operation.
int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type * > Tys, FastMathFlags FMF)
const TargetMachine & getTargetMachine() const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
unsigned getNumOperands() const
The main scalar evolution driver.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
unsigned getSizeInBits() const
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isLegalMaskedScatter(Type *DataType)
Type Conversion Cost Table.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
unsigned getNumberOfRegisters(bool Vector)
unsigned getMaxInterleaveFactor(unsigned VF)
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
bool isLegalMaskedStore(Type *DataType)
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Shift and rotation operations.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Type * getVectorElementType() const
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Simple integer binary arithmetic operators.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr)
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
size_t size() const
size - Get the array size.
Class to represent pointers.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
unsigned getVectorNumElements() const
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
MVT - Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Simple binary floating point operators.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isLegalMaskedGather(Type *DataType)
int64_t getSExtValue() const
Get sign extended value.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
APInt sext(unsigned width) const
Sign extend to a new width.
bool isLegalMaskedLoad(Type *DataType)
unsigned getBitWidth() const
Return the number of bits in the APInt.
Value * getOperand(unsigned i) const
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
EVT - Extended Value Type.
bool isPointerTy() const
True if this is an instance of PointerType.
bool enableInterleavedAccessVectorization()
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
unsigned getCFInstrCost(unsigned Opcode)
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
constexpr bool isInt< 32 >(int64_t x)
Type * getType() const
All values are typed, get the type of this value.
bool isUnalignedMem32Slow() const
Byte Swap and Counting operators.
const FeatureBitset & getFeatureBits() const
getFeatureBits - Return the feature bits.
Class to represent vector types.
Class for arbitrary precision integers.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
unsigned getRegisterBitWidth(bool Vector)
bool isIntegerTy() const
True if this is an instance of IntegerType.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
unsigned getVectorNumElements() const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
This class represents an analyzed expression in the program.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Primary interface to the complete machine description for the target machine.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
Convenience struct for specifying and reasoning about fast-math flags.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value * > Args, FastMathFlags FMF)
Get intrinsic cost based on arguments.
SetCC operator - This evaluates to a true value iff the condition is true.
MVT getVectorElementType() const
TRUNCATE - Completely drop the high bits.
static IntegerType * getInt8Ty(LLVMContext &C)
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
int getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
This file describes how to lower LLVM code to machine code.