16#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17#define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
54 enum MemIntrinsicType {
55 VECTOR_LDST_TWO_ELEMENTS,
56 VECTOR_LDST_THREE_ELEMENTS,
57 VECTOR_LDST_FOUR_ELEMENTS
60 bool isWideningInstruction(
Type *DstTy,
unsigned Opcode,
70 unsigned Index,
bool HasRealUse);
75 TLI(ST->getTargetLowering()) {}
104 bool Vector = (ClassID == 1);
123 SimplifyAndSetOp)
const;
128 return ST->getMinVectorRegisterBitWidth();
132 return ST->getVScaleForTuning();
159 const Value *
Ptr,
bool VariableMask,
196 ArrayRef<const Value *>
Args = ArrayRef<const Value *>(),
197 const Instruction *CxtI =
nullptr);
205 const Instruction *
I =
nullptr);
208 bool IsZeroCmp)
const;
215 const Instruction *
I =
nullptr);
220 TTI::UnrollingPreferences &UP,
221 OptimizationRemarkEmitter *ORE);
224 TTI::PeelingPreferences &PP);
253 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
268 if (!ST->hasSVE() || ST->forceStreamingCompatibleSVE())
272 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
273 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
274 DataTypeFVTy->getNumElements() < 2))
289 if (!ST->hasNEON() || NumElements.
isScalable())
297 unsigned VectorBits = NumElements.
getFixedValue() * ElementBits;
298 return VectorBits >= 64;
312 if (
auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) {
313 unsigned NumElements = DataTypeTy->getNumElements();
314 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
315 return NumElements > 1 &&
isPowerOf2_64(NumElements) && EltSize >= 8 &&
327 if (ST->isLittleEndian())
337 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false);
341 bool &AllowPromotionWithoutCommonHeader);
350 return ST->hasSVE() ? 5 : 0;
355 return IVUpdateMayOverflow
377 std::optional<FastMathFlags> FMF,
392 int64_t BaseOffset,
bool HasBaseReg,
393 int64_t Scale,
unsigned AddrSpace)
const;
399 Type *ScalarValTy)
const {
amdgpu Simplify well known AMD library false FunctionCallee Callee
static const Function * getParent(const Value *V)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const char LLVMTargetMachineRef TM
InstructionCost getSpliceCost(VectorType *Tp, int Index)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, FastMathFlags FMF, TTI::TargetCostKind CostKind)
bool supportsScalableVectors() const
unsigned getMinTripCountTailFoldingThreshold() const
bool isLegalNTStoreLoad(Type *DataType, Align Alignment)
unsigned getGISelRematGlobalCost() const
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
bool prefersVectorizedAddressing() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
bool isLegalMaskedStore(Type *DataType, Align Alignment)
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
bool enableScalableVectorization() const
bool enableOrderedReductions() const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind)
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
bool shouldExpandReduction(const IntrinsicInst *II) const
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
unsigned getMinVectorRegisterBitWidth() const
bool enableSelectOptimize()
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
bool isElementTypeLegalForScalableVector(Type *Ty) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment)
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool useNeonVector(const Type *Ty) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool isLegalNTLoad(Type *DataType, Align Alignment)
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const
bool isLegalMaskedGatherScatter(Type *DataType) const
unsigned getMaxInterleaveFactor(ElementCount VF)
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
unsigned getNumberOfRegisters(unsigned ClassID) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
bool isVScaleKnownToBeAPowerOfTwo() const
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool enableInterleavedAccessVectorization()
bool isLegalNTStore(Type *DataType, Align Alignment)
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind)
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
Class for arbitrary precision integers.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Base class which can be used to help build a TTI implementation.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Convenience struct for specifying and reasoning about fast-math flags.
The core instruction combiner logic.
A wrapper class for inspecting calls to intrinsic functions.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
This is an optimization pass for GlobalISel generic memory operations.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.