19#define DEBUG_TYPE "riscvtti"
22 "riscv-v-register-bit-width-lmul",
24 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
25 "by autovectorized code. Fractional LMULs are not supported."),
31 "Result used for getMaximumVF query which is used exclusively by "
32 "SLP vectorizer. Defaults to 1 which disables SLP."),
44 std::tie(LMul, Fractional) =
53 return std::max<unsigned>(Cost, 1);
59 "getIntImmCost can only estimate cost of materialising integers");
68 getST()->getFeatureBits());
76 auto *BO = dyn_cast<BinaryOperator>(Inst->
getOperand(0));
77 if (!BO || !BO->hasOneUse())
80 if (BO->getOpcode() != Instruction::Shl)
83 if (!isa<ConstantInt>(BO->getOperand(1)))
86 unsigned ShAmt = cast<ConstantInt>(BO->getOperand(1))->getZExtValue();
91 if (ShAmt == Trailing)
103 "getIntImmCost can only estimate cost of materialising integers");
111 bool Takes12BitImm =
false;
112 unsigned ImmArgIdx = ~0U;
115 case Instruction::GetElementPtr:
120 case Instruction::And:
122 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
125 if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
128 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
130 if (Inst &&
Idx == 1 && Imm.getBitWidth() <= ST->
getXLen() &&
133 Takes12BitImm =
true;
135 case Instruction::Add:
136 Takes12BitImm =
true;
138 case Instruction::Or:
139 case Instruction::Xor:
141 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
143 Takes12BitImm =
true;
145 case Instruction::Mul:
147 if (Imm.isNegatedPowerOf2())
150 Takes12BitImm =
true;
152 case Instruction::Sub:
153 case Instruction::Shl:
154 case Instruction::LShr:
155 case Instruction::AShr:
156 Takes12BitImm =
true;
167 if (Imm.getMinSignedBits() <= 64 &&
203 case Intrinsic::vector_reduce_mul:
204 case Intrinsic::vector_reduce_fmul:
250 return Cost * LT.first * getLMULCost(LT.second);
258 if (isa<ScalableVectorType>(Tp)) {
291 Instruction::InsertElement);
292 if (LT.second.getScalarSizeInBits() == 1) {
299 return LT.first * getLMULCost(LT.second) * 3;
310 return LT.first * getLMULCost(LT.second) * 6;
316 return LT.first * getLMULCost(LT.second);
323 return LT.first * getLMULCost(LT.second);
342 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
348 if ((Opcode == Instruction::Load &&
350 (Opcode == Instruction::Store &&
358 auto &VTy = *cast<VectorType>(DataTy);
361 {TTI::OK_AnyValue, TTI::OP_None},
I);
362 unsigned NumLoads = getEstimatedVLFor(&VTy);
363 return NumLoads * MemOpCost;
619 {Intrinsic::vp_bitreverse,
MVT::v2i8, 17},
620 {Intrinsic::vp_bitreverse,
MVT::v4i8, 17},
621 {Intrinsic::vp_bitreverse,
MVT::v8i8, 17},
804#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
805 case Intrinsic::VPID: \
807#include "llvm/IR/VPIntrinsics.def"
808#undef HELPER_MAP_VPID_TO_VPSD
817 switch (ICA.
getID()) {
818 case Intrinsic::ceil:
819 case Intrinsic::floor:
820 case Intrinsic::trunc:
821 case Intrinsic::rint:
822 case Intrinsic::round:
823 case Intrinsic::roundeven: {
830 case Intrinsic::umin:
831 case Intrinsic::umax:
832 case Intrinsic::smin:
833 case Intrinsic::smax: {
836 (LT.second.isScalarInteger() && ST->hasStdExtZbb()))
840 case Intrinsic::sadd_sat:
841 case Intrinsic::ssub_sat:
842 case Intrinsic::uadd_sat:
843 case Intrinsic::usub_sat: {
849 case Intrinsic::abs: {
858 case Intrinsic::fabs:
859 case Intrinsic::sqrt: {
866 case Intrinsic::experimental_stepvector: {
869 return Cost + (LT.first - 1);
871 case Intrinsic::vp_rint: {
876 return Cost * LT.first;
879 case Intrinsic::vp_nearbyint: {
884 return Cost * LT.first;
887 case Intrinsic::vp_ceil:
888 case Intrinsic::vp_floor:
889 case Intrinsic::vp_round:
890 case Intrinsic::vp_roundeven:
891 case Intrinsic::vp_roundtozero: {
898 return Cost * LT.first;
906 ICA.
getID(), LT.second))
907 return LT.first * Entry->Cost;
918 if (isa<VectorType>(Dst) && isa<VectorType>(Src)) {
924 if (Src->getScalarSizeInBits() > ST->
getELEN() ||
925 Dst->getScalarSizeInBits() > ST->
getELEN())
929 assert(ISD &&
"Invalid opcode");
932 int PowDiff = (int)
Log2_32(Dst->getScalarSizeInBits()) -
933 (
int)
Log2_32(Src->getScalarSizeInBits());
937 if (Src->getScalarSizeInBits() == 1) {
946 if (Dst->getScalarSizeInBits() == 1) {
958 return std::abs(PowDiff);
963 if (Src->getScalarSizeInBits() == 1 || Dst->getScalarSizeInBits() == 1) {
977 if (std::abs(PowDiff) <= 1)
981 if (Src->isIntOrIntVectorTy())
984 return std::abs(PowDiff);
990unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty) {
991 if (isa<ScalableVectorType>(Ty)) {
997 return cast<FixedVectorType>(Ty)->getNumElements();
1015 return (LT.first - 1) + 3;
1019 unsigned VL = getEstimatedVLFor(Ty);
1025 std::optional<FastMathFlags> FMF,
1035 assert(ISD &&
"Invalid opcode");
1044 return (LT.first - 1) + (ISD ==
ISD::AND ? 3 : 2);
1048 unsigned VL = getEstimatedVLFor(Ty);
1050 return (LT.first - 1) + BaseCost + VL;
1055 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
1066 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
1076 return (LT.first - 1) +
1084 if (!isa<VectorType>(Ty))
1111 if (Opcode == Instruction::Store && OpInfo.
isConstant())
1135 if (Opcode == Instruction::Select && ValTy->
isVectorTy()) {
1142 return LT.first * 3;
1145 return LT.first * 1;
1154 return LT.first * 5;
1160 return LT.first * 3;
1163 if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
1169 return LT.first * 1;
1188 return LT.first * 1;
1206 if (Opcode != Instruction::ExtractElement &&
1207 Opcode != Instruction::InsertElement)
1214 if (!LT.second.isVector())
1218 if (LT.second.isScalableVector() && !LT.first.isValid())
1226 unsigned BaseCost = 1;
1228 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
1233 if (LT.second.isFixedLengthVector()) {
1234 unsigned Width = LT.second.getVectorNumElements();
1241 else if (Opcode == Instruction::InsertElement)
1267 BaseCost = Opcode == Instruction::InsertElement ? 5 : 3;
1290 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
1292 return BaseCost + SlideCost;
1318 if (!LT.second.isVector())
1323 auto getConstantMatCost =
1343 ConstantMatCost += getConstantMatCost(0, Op1Info);
1345 ConstantMatCost += getConstantMatCost(1, Op2Info);
1363 return ConstantMatCost + getLMULCost(LT.second) * LT.first * 1;
1366 return ConstantMatCost +
1379 if (ST->enableDefaultUnroll())
1396 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
1400 if (ExitingBlocks.
size() > 2)
1416 for (
auto &
I : *BB) {
1419 if (
I.getType()->isVectorTy())
1422 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Cost tables and simple lookup functions.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const Function * getParent() const
Return the enclosing method, or null if none.
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
std::optional< unsigned > getMaxVScale() const
unsigned getRegUsageForType(Type *Ty)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool isLegalAddImmediate(int64_t imm)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
bool isIntPredicate() const
A parsed version of the target data layout string in and methods for querying it.
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
static InstructionCost getInvalid(CostType Val=0)
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Type * getReturnType() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
BlockT * getHeader() const
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Represents a single loop in the control flow graph.
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
bool hasVInstructionsF64() const
unsigned getRealMinVLen() const
bool useRVVForFixedLengthVectors() const
bool hasVInstructionsF16() const
bool hasVInstructions() const
unsigned getRealMaxVLen() const
bool hasVInstructionsF32() const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
std::optional< unsigned > getMaxVScale() const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
bool shouldExpandReduction(const IntrinsicInst *II) const
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind)
Return the cost of materializing an immediate for a value operand of a store instruction.
unsigned getRegUsageForType(Type *Ty)
bool isLegalMaskedGather(Type *DataType, Align Alignment)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isLegalMaskedScatter(Type *DataType, Align Alignment)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
static RISCVII::VLMUL getLMUL(MVT VT)
The main scalar evolution driver.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimunSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
int getIntMatCost(const APInt &Val, unsigned Size, const FeatureBitset &ActiveFeatures, bool CompressionCost)
std::pair< unsigned, bool > decodeVLMUL(RISCVII::VLMUL VLMUL)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.