22 #define DEBUG_TYPE "aarch64tti" 38 return (CallerBits & CalleeBits) == CalleeBits;
69 ImmVal = Imm.
sext((BitSize + 63) & ~0x3fU);
74 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
93 unsigned ImmIdx = ~0U;
97 case Instruction::GetElementPtr:
106 case Instruction::Sub:
107 case Instruction::Mul:
108 case Instruction::UDiv:
109 case Instruction::SDiv:
110 case Instruction::URem:
111 case Instruction::SRem:
112 case Instruction::And:
113 case Instruction::Or:
114 case Instruction::Xor:
115 case Instruction::ICmp:
119 case Instruction::Shl:
120 case Instruction::LShr:
121 case Instruction::AShr:
125 case Instruction::Trunc:
126 case Instruction::ZExt:
127 case Instruction::SExt:
128 case Instruction::IntToPtr:
129 case Instruction::PtrToInt:
130 case Instruction::BitCast:
131 case Instruction::PHI:
140 int NumConstants = (BitSize + 63) / 64;
162 case Intrinsic::sadd_with_overflow:
163 case Intrinsic::uadd_with_overflow:
164 case Intrinsic::ssub_with_overflow:
165 case Intrinsic::usub_with_overflow:
166 case Intrinsic::smul_with_overflow:
167 case Intrinsic::umul_with_overflow:
169 int NumConstants = (BitSize + 63) / 64;
176 case Intrinsic::experimental_stackmap:
180 case Intrinsic::experimental_patchpoint_void:
181 case Intrinsic::experimental_patchpoint_i64:
192 if (TyWidth == 32 || TyWidth == 64)
198 bool AArch64TTIImpl::isWideningInstruction(
Type *DstTy,
unsigned Opcode,
203 auto toVectorTy = [&](
Type *ArgTy) {
222 case Instruction::Sub:
232 if (Args.
size() != 2 ||
233 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
236 auto *Extend = cast<CastInst>(Args[1]);
241 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
247 Type *SrcTy = toVectorTy(Extend->getSrcTy());
249 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
254 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
255 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
259 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
265 assert(ISD &&
"Invalid opcode");
270 auto *SingleUser = cast<Instruction>(*I->
user_begin());
272 if (isWideningInstruction(Dst, SingleUser->getOpcode(),
Operands)) {
280 if (
auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
282 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
403 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
411 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) &&
"Invalid type");
425 if (!VecLT.second.isVector() || !TLI->
isTypeLegal(DstVT))
430 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
439 case Instruction::SExt:
444 case Instruction::ZExt:
445 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
462 if (!LT.second.isVector())
466 unsigned Width = LT.second.getVectorNumElements();
467 Index = Index %
Width;
492 if (isWideningInstruction(Ty, Opcode, Args))
500 Opd1PropInfo, Opd2PropInfo);
542 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
547 Opd1PropInfo, Opd2PropInfo);
552 Opd2Info, Opd1PropInfo, Opd2PropInfo);
554 Opd2Info, Opd1PropInfo, Opd2PropInfo);
568 return (Cost + 1) * LT.first;
578 unsigned NumVectorInstToHideOverhead = 10;
579 int MaxMergeDistance = 64;
583 return NumVectorInstToHideOverhead;
598 const int AmortizationCost = 20;
600 VectorSelectTbl[] = {
640 LT.second.is128BitVector() && Alignment < 16) {
646 const int AmortizationCost = 6;
648 return LT.first * 2 * AmortizationCost;
652 unsigned ProfitableNumElements;
655 ProfitableNumElements = 4;
659 ProfitableNumElements = 8;
663 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
665 return NumVectorizableInstsToAmortize * NumVecElts * 2;
678 bool UseMaskForGaps) {
679 assert(Factor >= 2 &&
"Invalid interleave factor");
680 assert(isa<VectorType>(VecTy) &&
"Expect a vector type");
682 if (!UseMaskForCond && !UseMaskForGaps &&
683 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
690 if (NumElts % Factor == 0 &&
696 Alignment, AddressSpace,
697 UseMaskForCond, UseMaskForGaps);
702 for (
auto *
I : Tys) {
703 if (!
I->isVectorTy())
705 if (
I->getScalarSizeInBits() *
I->getVectorNumElements() == 128)
723 enum { MaxStridedLoads = 7 };
725 int StridedLoads = 0;
728 for (
const auto BB : L->
blocks()) {
729 for (
auto &
I : *BB) {
740 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
749 if (StridedLoads > MaxStridedLoads / 2)
756 int StridedLoads = countStridedLoads(L, SE);
758 <<
" strided loads\n");
788 Type *ExpectedType) {
792 case Intrinsic::aarch64_neon_st2:
793 case Intrinsic::aarch64_neon_st3:
794 case Intrinsic::aarch64_neon_st4: {
802 for (
unsigned i = 0,
e = NumElts; i !=
e; ++i) {
808 for (
unsigned i = 0,
e = NumElts; i !=
e; ++i) {
814 case Intrinsic::aarch64_neon_ld2:
815 case Intrinsic::aarch64_neon_ld3:
816 case Intrinsic::aarch64_neon_ld4:
817 if (Inst->
getType() == ExpectedType)
828 case Intrinsic::aarch64_neon_ld2:
829 case Intrinsic::aarch64_neon_ld3:
830 case Intrinsic::aarch64_neon_ld4:
835 case Intrinsic::aarch64_neon_st2:
836 case Intrinsic::aarch64_neon_st3:
837 case Intrinsic::aarch64_neon_st4:
847 case Intrinsic::aarch64_neon_ld2:
848 case Intrinsic::aarch64_neon_st2:
851 case Intrinsic::aarch64_neon_ld3:
852 case Intrinsic::aarch64_neon_st3:
855 case Intrinsic::aarch64_neon_ld4:
856 case Intrinsic::aarch64_neon_st4:
869 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) {
870 bool Considerable =
false;
871 AllowPromotionWithoutCommonHeader =
false;
872 if (!isa<SExtInst>(&I))
874 Type *ConsideredSExtType =
876 if (I.
getType() != ConsideredSExtType)
886 if (GEPInst->getNumOperands() > 2) {
887 AllowPromotionWithoutCommonHeader =
true;
897 assert(isa<VectorType>(Ty) &&
"Expected Ty to be a vector type");
900 case Instruction::FAdd:
901 case Instruction::FMul:
902 case Instruction::And:
903 case Instruction::Or:
904 case Instruction::Xor:
905 case Instruction::Mul:
909 case Instruction::ICmp:
910 return (ScalarBits < 64) &&
912 case Instruction::FCmp:
921 bool IsPairwiseForm) {
929 assert(ISD &&
"Invalid opcode");
943 return LT.first *
Entry->Cost;
995 return LT.first *
Entry->Cost;
Type * getVectorElementType() const
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
APInt sext(unsigned width) const
Sign extend to a new width.
bool isMisaligned128StoreSlow() const
This class represents lattice values for constants.
Type * getElementType(unsigned N) const
Cost tables and simple lookup functions.
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
unsigned getLoopDepth() const
Return the nesting level of this loop.
unsigned getNumElements() const
Random access to the elements.
The main scalar evolution driver.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Type Conversion Cost Table.
An instruction for reading from memory.
static IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getMaxInterleaveFactor(unsigned VF)
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
unsigned getBitWidth() const
Return the number of bits in the APInt.
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)
Value * getArgOperand(unsigned i) const
unsigned short MatchingId
const FeatureBitset & getFeatureBits() const
Class to represent struct types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
mir Rename Register Operands
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
int64_t getSExtValue() const
Get sign extended value.
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Type * getType() const
All values are typed, get the type of this value.
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
This node represents a polynomial recurrence on the trip count of the specified loop.
Simple integer binary arithmetic operators.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
unsigned getWideningBaseCost() const
Value * getOperand(unsigned i) const
Analysis containing CSE Info
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
initializer< Ty > init(const Ty &Val)
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
bool requiresStrictAlign() const
Container class for subtarget features.
int getIntImmCost(int64_t Val)
Calculate the cost of materializing a 64-bit value.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
The instances of the Type class are immutable: once they are created, they are never changed...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
size_t size() const
size - Get the array size.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)
See if I should be considered for address type promotion.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
Value * getPointerOperand()
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
const TargetMachine & getTargetMachine() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Align max(MaybeAlign Lhs, Align Rhs)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
int getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
unsigned getVectorNumElements() const
Class to represent vector types.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
unsigned getMaxInterleaveFactor() const
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Select(COND, TRUEVAL, FALSEVAL).
unsigned getMaxExpandSizeMemcmp(bool OptSize) const
Get maximum # of load operations permitted for memcmp.
iterator_range< user_iterator > users()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
unsigned getNumArgOperands() const
Bitwise operators - logical and, logical or, logical xor.
This class represents an analyzed expression in the program.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Represents a single loop in the control flow graph.
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const Function * getParent() const
Return the enclosing method, or null if none.
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
LLVM Value Representation.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Primary interface to the complete machine description for the target machine.
Type * getElementType() const
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasOneUse() const
Return true if there is exactly one user of this value.
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
TRUNCATE - Completely drop the high bits.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
iterator_range< block_iterator > blocks() const
Information about a load/store intrinsic defined by the target.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
A wrapper class for inspecting calls to intrinsic functions.
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
unsigned getVectorInsertExtractBaseCost() const
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...