21 #define DEBUG_TYPE "aarch64tti"
36 return (64 - LZ + 15) / 16;
50 ImmVal = Imm.
sext((BitSize + 63) & ~0x3fU);
55 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
61 return std::max(1, Cost);
74 unsigned ImmIdx = ~0U;
78 case Instruction::GetElementPtr:
87 case Instruction::Sub:
88 case Instruction::Mul:
89 case Instruction::UDiv:
90 case Instruction::SDiv:
91 case Instruction::URem:
92 case Instruction::SRem:
96 case Instruction::ICmp:
100 case Instruction::Shl:
101 case Instruction::LShr:
102 case Instruction::AShr:
106 case Instruction::Trunc:
107 case Instruction::ZExt:
108 case Instruction::SExt:
109 case Instruction::IntToPtr:
110 case Instruction::PtrToInt:
111 case Instruction::BitCast:
112 case Instruction::PHI:
121 int NumConstants = (BitSize + 63) / 64;
143 case Intrinsic::sadd_with_overflow:
144 case Intrinsic::uadd_with_overflow:
145 case Intrinsic::ssub_with_overflow:
146 case Intrinsic::usub_with_overflow:
147 case Intrinsic::smul_with_overflow:
148 case Intrinsic::umul_with_overflow:
150 int NumConstants = (BitSize + 63) / 64;
157 case Intrinsic::experimental_stackmap:
161 case Intrinsic::experimental_patchpoint_void:
162 case Intrinsic::experimental_patchpoint_i64:
173 if (TyWidth == 32 || TyWidth == 64)
181 assert(ISD &&
"Invalid opcode");
299 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
307 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
321 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
326 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
335 case Instruction::SExt:
340 case Instruction::ZExt:
341 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
358 if (!LT.second.isVector())
362 unsigned Width = LT.second.getVectorNumElements();
363 Index = Index % Width;
408 Opd1PropInfo, Opd2PropInfo);
426 unsigned NumVectorInstToHideOverhead = 10;
427 int MaxMergeDistance = 64;
431 return NumVectorInstToHideOverhead;
446 const int AmortizationCost = 20;
448 VectorSelectTbl[] = {
474 LT.second.is128BitVector() && Alignment < 16) {
480 const int AmortizationCost = 6;
482 return LT.first * 2 * AmortizationCost;
490 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
492 return NumVectorizableInstsToAmortize * NumVecElts * 2;
503 assert(Factor >= 2 &&
"Invalid interleave factor");
504 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
506 if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
512 if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
517 Alignment, AddressSpace);
522 for (
auto *
I : Tys) {
523 if (!
I->isVectorTy())
525 if (
I->getScalarSizeInBits() *
I->getVectorNumElements() == 128)
552 Type *ExpectedType) {
556 case Intrinsic::aarch64_neon_st2:
557 case Intrinsic::aarch64_neon_st3:
558 case Intrinsic::aarch64_neon_st4: {
566 for (
unsigned i = 0, e = NumElts;
i != e; ++
i) {
572 for (
unsigned i = 0, e = NumElts;
i != e; ++
i) {
578 case Intrinsic::aarch64_neon_ld2:
579 case Intrinsic::aarch64_neon_ld3:
580 case Intrinsic::aarch64_neon_ld4:
581 if (Inst->
getType() == ExpectedType)
592 case Intrinsic::aarch64_neon_ld2:
593 case Intrinsic::aarch64_neon_ld3:
594 case Intrinsic::aarch64_neon_ld4:
601 case Intrinsic::aarch64_neon_st2:
602 case Intrinsic::aarch64_neon_st3:
603 case Intrinsic::aarch64_neon_st4:
615 case Intrinsic::aarch64_neon_ld2:
616 case Intrinsic::aarch64_neon_st2:
619 case Intrinsic::aarch64_neon_ld3:
620 case Intrinsic::aarch64_neon_st3:
623 case Intrinsic::aarch64_neon_ld4:
624 case Intrinsic::aarch64_neon_st4:
APInt ashr(unsigned shiftAmt) const
Arithmetic right-shift function.
unsigned getVectorInsertExtractBaseCost() const
Cost tables and simple lookup functions.
unsigned getPrefetchDistance()
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
The main scalar evolution driver.
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
bool IsSimple
True only if this memory operation is non-volatile, non-atomic, and unordered.
Type Conversion Cost Table.
unsigned getMaxInterleaveFactor(unsigned VF)
unsigned getMinPrefetchStride()
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
unsigned short MatchingId
Class to represent struct types.
unsigned getNumArgOperands() const
Return the number of call arguments.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Type * getVectorElementType() const
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
unsigned getMinPrefetchStride() const
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Simple integer binary arithmetic operators.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
unsigned getPrefetchDistance() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
Type * getElementType() const
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
The instances of the Type class are immutable: once they are created, they are never changed...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
bool isVectorTy() const
True if this is an instance of VectorType.
Type * getElementType(unsigned N) const
unsigned getMaxPrefetchIterationsAhead() const
int64_t getSExtValue() const
Get sign extended value.
unsigned getMaxPrefetchIterationsAhead()
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
APInt sext(unsigned width) const
Sign extend to a new width.
unsigned getBitWidth() const
Return the number of bits in the APInt.
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
bool isMisaligned128StoreSlow() const
EVT - Extended Value Type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
unsigned getCacheLineSize()
Type * getType() const
All values are typed, get the type of this value.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
unsigned getMaxInterleaveFactor() const
unsigned getCacheLineSize() const
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
Class to represent vector types.
Class for arbitrary precision integers.
Select(COND, TRUEVAL, FALSEVAL).
bool isIntegerTy() const
True if this is an instance of IntegerType.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
unsigned getVectorNumElements() const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Bitwise operators - logical and, logical or, logical xor.
This class represents an analyzed expression in the program.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
int getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)
Represents a single loop in the control flow graph.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
TRUNCATE - Completely drop the high bits.
unsigned getNumElements() const
Random access to the elements.
unsigned getLoopDepth() const
Return the nesting level of this loop.
Information about a load/store intrinsic defined by the target.
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
A wrapper class for inspecting calls to intrinsic functions.
This file describes how to lower LLVM code to machine code.