26 #define DEBUG_TYPE "x86tti"
36 assert(
isPowerOf2_32(TyWidth) &&
"Ty width must be power of 2");
58 if (ST->
hasAVX())
return 256;
95 assert(ISD &&
"Invalid opcode");
119 AVX2UniformConstCostTable[] = {
132 return LT.first * AVX2UniformConstCostTable[Idx].
Cost;
182 return LT.first * AVX512CostTable[Idx].
Cost;
195 return LT.first * AVX2CostTable[Idx].
Cost;
199 SSE2UniformConstCostTable[] = {
228 return LT.first * 15;
232 return LT.first * SSE2UniformConstCostTable[Idx].
Cost;
294 return LT.first * SSE2CostTable[Idx].
Cost;
327 return LT.first * AVX1CostTable[Idx].
Cost;
339 return LT.first * CustomLowered[Idx].
Cost;
360 if (LT.second.getSizeInBits() > 128)
364 return Cost * LT.first;
396 return LT.first * AVXAltShuffleTbl[Idx].Cost;
417 if (
ST->hasSSE41()) {
420 return LT.first * SSE41AltShuffleTbl[Idx].Cost;
436 if (
ST->hasSSSE3()) {
439 return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
459 return LT.first * SSEAltShuffleTbl[Idx].Cost;
460 return BaseT::getShuffleCost(
Kind, Tp, Index, SubTp);
463 return BaseT::getShuffleCost(
Kind, Tp, Index, SubTp);
466 unsigned X86TTIImpl::getCastInstrCost(
unsigned Opcode,
Type *Dst,
Type *Src) {
468 assert(ISD &&
"Invalid opcode");
501 return LTSrc.first * SSE2ConvTbl[Idx].
Cost;
505 AVX512ConversionTbl[] = {
541 return AVX512ConversionTbl[Idx].
Cost;
551 AVX2ConversionTbl[] = {
583 AVXConversionTbl[] = {
657 return AVX2ConversionTbl[Idx].
Cost;
664 return AVXConversionTbl[Idx].
Cost;
678 assert(ISD &&
"Invalid opcode");
716 return LT.first * AVX512CostTbl[Idx].
Cost;
722 return LT.first * AVX2CostTbl[Idx].
Cost;
728 return LT.first * AVX1CostTbl[Idx].
Cost;
734 return LT.first * SSE42CostTbl[Idx].
Cost;
742 assert(Val->
isVectorTy() &&
"This must be a vector type");
749 if (!LT.second.isVector())
753 unsigned Width = LT.second.getVectorNumElements();
754 Index = Index % Width;
764 unsigned X86TTIImpl::getScalarizationOverhead(
Type *Ty,
bool Insert,
766 assert (Ty->
isVectorTy() &&
"Can only scalarize vectors");
783 if (
VectorType *VTy = dyn_cast<VectorType>(Src)) {
784 unsigned NumElem = VTy->getVectorNumElements();
788 if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
793 if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
801 unsigned SplitCost = getScalarizationOverhead(Src,
804 return NumElem * Cost + SplitCost;
814 unsigned Cost = LT.first * 1;
818 if (LT.second.getSizeInBits() > 128 && !ST->
hasAVX2())
839 unsigned MaskSplitCost = getScalarizationOverhead(MaskTy,
false,
true);
840 unsigned ScalarCompareCost =
844 unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
846 unsigned ValueSplitCost =
852 return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
864 else if (LT.second.getVectorNumElements() > NumElem) {
871 return Cost + LT.first*4;
874 return Cost+LT.first;
882 unsigned NumVectorInstToHideOverhead = 10;
885 return NumVectorInstToHideOverhead;
898 assert(ISD &&
"Invalid opcode");
945 return LT.first * AVX1CostTblPairWise[Idx].
Cost;
951 return LT.first * SSE42CostTblPairWise[Idx].
Cost;
957 return LT.first * AVX1CostTblNoPairWise[Idx].
Cost;
963 return LT.first * SSE42CostTblNoPairWise[Idx].
Cost;
1003 ImmVal = Imm.
sext((BitSize + 63) & ~0x3fU);
1008 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
1014 return std::max(1U, Cost);
1027 unsigned ImmIdx = ~0U;
1031 case Instruction::GetElementPtr:
1041 case Instruction::Add:
1042 case Instruction::Sub:
1043 case Instruction::Mul:
1044 case Instruction::UDiv:
1045 case Instruction::SDiv:
1046 case Instruction::URem:
1047 case Instruction::SRem:
1051 case Instruction::ICmp:
1055 case Instruction::Shl:
1056 case Instruction::LShr:
1057 case Instruction::AShr:
1061 case Instruction::Trunc:
1062 case Instruction::ZExt:
1063 case Instruction::SExt:
1064 case Instruction::IntToPtr:
1065 case Instruction::PtrToInt:
1066 case Instruction::BitCast:
1075 if (Idx == ImmIdx) {
1076 unsigned NumConstants = (BitSize + 63) / 64;
1099 case Intrinsic::sadd_with_overflow:
1100 case Intrinsic::uadd_with_overflow:
1101 case Intrinsic::ssub_with_overflow:
1102 case Intrinsic::usub_with_overflow:
1103 case Intrinsic::smul_with_overflow:
1104 case Intrinsic::umul_with_overflow:
1108 case Intrinsic::experimental_stackmap:
1112 case Intrinsic::experimental_patchpoint_void:
1113 case Intrinsic::experimental_patchpoint_i64:
1125 if ((DataWidth < 32) || (Consecutive == 0))
1149 return (CallerBits & CalleeBits) == CalleeBits;
bool isInt< 32 >(int64_t x)
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
APInt LLVM_ATTRIBUTE_UNUSED_RESULT ashr(unsigned shiftAmt) const
Arithmetic right-shift function.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
Cost tables and simple lookup functions.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
const TargetMachine & getTargetMachine() const
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
int CostTableLookup(const CostTblEntry< TypeTy > *Tbl, unsigned len, int ISD, CompareTy Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Type Conversion Cost Table.
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
unsigned getNumberOfRegisters(bool Vector)
unsigned getMaxInterleaveFactor(unsigned VF)
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
unsigned getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
Shift and rotation operations.
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Type * getVectorElementType() const
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Simple integer binary arithmetic operators.
bool isFloatingPointTy() const
isFloatingPointTy - Return true if this is one of the six floating point types
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
unsigned getVectorNumElements() const
MVT - Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Simple binary floating point operators.
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
int64_t getSExtValue() const
Get sign extended value.
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sext(unsigned width) const
Sign extend to a new width.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isLegalMaskedStore(Type *DataType, int Consecutive)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
EVT - Extended Value Type.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
unsigned getCFInstrCost(unsigned Opcode)
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
unsigned getVectorNumElements() const
const FeatureBitset & getFeatureBits() const
getFeatureBits - Return the feature bits.
bool hasCompatibleFunctionAttributes(const Function *Caller, const Function *Callee) const
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
VectorType - Class to represent vector types.
Class for arbitrary precision integers.
unsigned getRegisterBitWidth(bool Vector)
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
const Type * getScalarType() const LLVM_READONLY
getScalarType - If this is a vector type, return the element type, otherwise return 'this'...
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
unsigned getAddressComputationCost(Type *Ty, bool IsComplex)
const ARM::ArchExtKind Kind
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
int ConvertCostTableLookup(const TypeConversionCostTblEntry< TypeTy > *Tbl, unsigned len, int ISD, CompareTy Dst, CompareTy Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
Primary interface to the complete machine description for the target machine.
bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
SetCC operator - This evaluates to a true value iff the condition is true.
TRUNCATE - Completely drop the high bits.
unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex)
std::pair< unsigned, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
static IntegerType * getInt8Ty(LLVMContext &C)
LLVMContext & getGlobalContext()
getGlobalContext - Returns a global context.
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
This file describes how to lower LLVM code to machine code.
bool isLegalMaskedLoad(Type *DataType, int Consecutive)