29#define DEBUG_TYPE "hexagontti"
36 cl::desc(
"Enable auto-vectorization of floatint point types on v68."));
40 cl::desc(
"Control lookup table emission on Hexagon target"));
50bool HexagonTTIImpl::useHVX()
const {
54bool HexagonTTIImpl::isHVXVectorType(
Type *Ty)
const {
55 auto *VecTy = dyn_cast<VectorType>(Ty);
60 if (ST.
useHVXV69Ops() || !VecTy->getElementType()->isFloatingPointTy())
65unsigned HexagonTTIImpl::getTypeNumElements(
Type *Ty)
const {
66 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty))
67 return VTy->getNumElements();
69 "Expecting scalar type");
91 if (L && L->isInnermost() &&
canPeel(L) &&
109 return useHVX() ? 32 : 0;
114 return useHVX() ? 2 : 1;
136 bool IsScalable)
const {
137 assert(!IsScalable &&
"Scalable VFs are not supported for Hexagon");
150 if (ICA.
getID() == Intrinsic::bswap) {
151 std::pair<InstructionCost, MVT> LT =
170 assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
175 if (Opcode == Instruction::Store)
179 if (Src->isVectorTy()) {
182 if (isHVXVectorType(VecTy)) {
186 assert(RegWidth &&
"Non-zero vector register width expected");
188 if (VecWidth % RegWidth == 0)
189 return VecWidth / RegWidth;
191 const Align RegAlign(RegWidth / 8);
192 if (!Alignment || *Alignment > RegAlign)
193 Alignment = RegAlign;
195 unsigned AlignWidth = 8 * Alignment->value();
196 unsigned NumLoads =
alignTo(VecWidth, AlignWidth) / AlignWidth;
207 unsigned AlignWidth = 8 * BoundAlignment.
value();
208 unsigned NumLoads =
alignTo(VecWidth, AlignWidth) / AlignWidth;
209 if (Alignment ==
Align(4) || Alignment ==
Align(8))
210 return Cost * NumLoads;
213 unsigned LogA =
Log2(BoundAlignment);
214 return (3 - LogA) *
Cost * NumLoads;
232 int Index,
Type *SubTp,
239 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
248 bool UseMaskForCond,
bool UseMaskForGaps) {
249 if (Indices.
size() != Factor || UseMaskForCond || UseMaskForGaps)
253 UseMaskForCond, UseMaskForGaps);
266 if (Opcode == Instruction::FCmp)
267 return LT.first +
FloatFactor * getTypeNumElements(ValTy);
270 Op1Info, Op2Info,
I);
281 Op2Info, Args, CxtI);
287 if (LT.second.isFloatingPoint())
288 return LT.first +
FloatFactor * getTypeNumElements(Ty);
299 auto isNonHVXFP = [
this] (
Type *Ty) {
302 if (isNonHVXFP(SrcTy) || isNonHVXFP(DstTy))
312 std::max(SrcLT.first, DstLT.first) +
FloatFactor * (SrcN + DstN);
315 return Cost == 0 ? 0 : 1;
323 unsigned Index,
Value *Op0,
325 Type *ElemTy = Val->
isVectorTy() ? cast<VectorType>(Val)->getElementType()
327 if (Opcode == Instruction::InsertElement) {
329 unsigned Cost = (Index != 0) ? 2 : 0;
337 if (Opcode == Instruction::ExtractElement)
369 auto isCastFoldedIntoLoad = [
this](
const CastInst *CI) ->
bool {
370 if (!CI->isIntegerCast())
377 if (DBW != 32 || SBW >= DBW)
380 const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
387 if (
const CastInst *CI = dyn_cast<const CastInst>(U))
388 if (isCastFoldedIntoLoad(CI))
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
mir Rename Register Operands
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
This is the base class for all instructions that perform data casts.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
static constexpr ElementCount getFixed(ScalarTy MinVal)
unsigned getL1PrefetchDistance() const
bool useHVXV69Ops() const
unsigned getVectorLength() const
bool useHVXV68Ops() const
unsigned getL1CacheLineSize() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool isLegalMaskedLoad(Type *DataType, Align Alignment)
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
unsigned getNumberOfRegisters(bool vector) const
— Vector TTI begin —
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
bool shouldBuildLookupTables() const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
bool isLegalMaskedStore(Type *DataType, Align Alignment)
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Bias LSR towards creating post-increment opportunities.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
unsigned getMinVectorRegisterBitWidth() const
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, Type *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
unsigned getCacheLineSize() const override
unsigned getPrefetchDistance() const override
— Vector TTI end —
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
unsigned getMaxInterleaveFactor(ElementCount VF)
static InstructionCost getMax()
Type * getReturnType() const
Intrinsic::ID getID() const
An instruction for reading from memory.
Represents a single loop in the control flow graph.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
unsigned getSmallConstantTripCount(const Loop *L)
Returns the exact trip count of the loop if we can compute it, and the result is a small constant.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Base class of all SIMD vector types.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool canPeel(const Loop *L)
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
unsigned Log2(Align A)
Returns the log2 of the alignment.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.