14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
55 if (!isa<Constant>(Operand))
68 return SI.getNumCases();
114 std::pair<const Value *, unsigned>
116 return std::make_pair(
nullptr, -1);
125 assert(
F &&
"A concrete function must be provided to this routine.");
132 if (
F->isIntrinsic())
135 if (
F->hasLocalLinkage() || !
F->hasName())
141 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
151 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
152 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
184 std::optional<Value *>
187 bool &KnownBitsComputed)
const {
195 SimplifyAndSetOp)
const {
211 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
215 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
281 Align Alignment)
const {
305 int64_t BaseOffset,
bool HasBaseReg,
307 unsigned AddrSpace)
const {
321 bool useAA()
const {
return false; }
336 const APInt &DemandedElts,
337 bool Insert,
bool Extract,
362 bool IsZeroCmp)
const {
376 unsigned *
Fast)
const {
426 return "Generic::Unknown Register Class";
428 return "Generic::ScalarRC";
430 return "Generic::VectorRC";
453 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
457 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
458 AllowPromotionWithoutCommonHeader =
false;
463 std::optional<unsigned>
474 std::optional<unsigned>
488 unsigned NumStridedMemAccesses,
489 unsigned NumPrefetches,
bool HasCall)
const {
505 auto IsWidenableCondition = [](
const Value *V) {
506 if (
auto *II = dyn_cast<IntrinsicInst>(V))
507 if (II->
getIntrinsicID() == Intrinsic::experimental_widenable_condition)
516 case Instruction::FDiv:
517 case Instruction::FRem:
518 case Instruction::SDiv:
519 case Instruction::SRem:
520 case Instruction::UDiv:
521 case Instruction::URem:
524 case Instruction::And:
525 case Instruction::Or:
526 if (
any_of(Args, IsWidenableCondition))
553 case Instruction::IntToPtr: {
554 unsigned SrcSize = Src->getScalarSizeInBits();
560 case Instruction::PtrToInt: {
561 unsigned DstSize = Dst->getScalarSizeInBits();
567 case Instruction::BitCast:
568 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
572 case Instruction::Trunc: {
586 unsigned Index)
const {
615 unsigned Index)
const {
620 const APInt &DemandedDstElts,
647 const Value *
Ptr,
bool VariableMask,
657 bool UseMaskForCond,
bool UseMaskForGaps)
const {
663 switch (ICA.
getID()) {
666 case Intrinsic::annotation:
667 case Intrinsic::assume:
668 case Intrinsic::sideeffect:
669 case Intrinsic::pseudoprobe:
670 case Intrinsic::arithmetic_fence:
671 case Intrinsic::dbg_assign:
672 case Intrinsic::dbg_declare:
673 case Intrinsic::dbg_value:
674 case Intrinsic::dbg_label:
675 case Intrinsic::invariant_start:
676 case Intrinsic::invariant_end:
677 case Intrinsic::launder_invariant_group:
678 case Intrinsic::strip_invariant_group:
679 case Intrinsic::is_constant:
680 case Intrinsic::lifetime_start:
681 case Intrinsic::lifetime_end:
682 case Intrinsic::experimental_noalias_scope_decl:
683 case Intrinsic::objectsize:
684 case Intrinsic::ptr_annotation:
685 case Intrinsic::var_annotation:
686 case Intrinsic::experimental_gc_result:
687 case Intrinsic::experimental_gc_relocate:
688 case Intrinsic::coro_alloc:
689 case Intrinsic::coro_begin:
690 case Intrinsic::coro_free:
691 case Intrinsic::coro_end:
692 case Intrinsic::coro_frame:
693 case Intrinsic::coro_size:
694 case Intrinsic::coro_align:
695 case Intrinsic::coro_suspend:
696 case Intrinsic::coro_subfn_addr:
697 case Intrinsic::threadlocal_address:
698 case Intrinsic::experimental_widenable_condition:
715 const SCEV *)
const {
720 std::optional<FastMathFlags> FMF,
732 std::optional<FastMathFlags> FMF,
761 Type *ExpectedType)
const {
767 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
768 unsigned SrcAlign,
unsigned DestAlign,
769 std::optional<uint32_t> AtomicElementSize)
const {
776 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
777 unsigned SrcAlign,
unsigned DestAlign,
778 std::optional<uint32_t> AtomicCpySize)
const {
779 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
781 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
787 return (Caller->getFnAttribute(
"target-cpu") ==
788 Callee->getFnAttribute(
"target-cpu")) &&
789 (Caller->getFnAttribute(
"target-features") ==
790 Callee->getFnAttribute(
"target-features"));
795 return (Caller->getFnAttribute(
"target-cpu") ==
796 Callee->getFnAttribute(
"target-cpu")) &&
797 (Caller->getFnAttribute(
"target-features") ==
798 Callee->getFnAttribute(
"target-features"));
818 unsigned AddrSpace)
const {
823 unsigned AddrSpace)
const {
835 unsigned ChainSizeInBytes,
841 unsigned ChainSizeInBytes,
871 Align Alignment)
const {
888 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
889 const auto *VectorValue = cast<Constant>(Val);
893 auto *VT = cast<FixedVectorType>(Val->
getType());
899 unsigned MaxRequiredSize =
900 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
902 unsigned MinRequiredSize = 0;
903 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
904 if (
auto *IntElement =
905 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
906 bool signedElement = IntElement->getValue().isNegative();
908 unsigned ElementMinRequiredSize =
909 IntElement->getValue().getSignificantBits() - 1;
913 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
916 return MaxRequiredSize;
919 return MinRequiredSize;
922 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
923 isSigned = CI->getValue().isNegative();
924 return CI->getValue().getSignificantBits() - 1;
927 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
929 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
932 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
934 return Cast->getSrcTy()->getScalarSizeInBits();
942 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
954 int64_t MergeDistance)
const {
982 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
983 assert(cast<PointerType>(
Ptr->getType()->getScalarType())
984 ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
985 "explicit pointee type doesn't match operand's pointee type");
986 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
987 bool HasBaseReg = (BaseGV ==
nullptr);
990 APInt BaseOffset(PtrSizeBits, 0);
994 Type *TargetType =
nullptr;
1002 TargetType = GTI.getIndexedType();
1005 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
1008 ConstIdx = dyn_cast<ConstantInt>(Splat);
1009 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1011 assert(ConstIdx &&
"Unexpected GEP index");
1017 if (isa<ScalableVectorType>(TargetType))
1019 int64_t ElementSize =
1029 Scale = ElementSize;
1037 Ptr->getType()->getPointerAddressSpace()))
1057 for (
const Value *V : Ptrs) {
1058 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1061 if (
Info.isSameBase() && V !=
Base) {
1062 if (
GEP->hasAllConstantIndices())
1071 GEP->getPointerOperand(),
1083 auto *TargetTTI =
static_cast<T *
>(
this);
1086 auto *CB = dyn_cast<CallBase>(U);
1087 if (CB && !isa<IntrinsicInst>(U)) {
1088 if (
const Function *
F = CB->getCalledFunction()) {
1089 if (!TargetTTI->isLoweredToCall(
F))
1098 Type *Ty = U->getType();
1100 auto *
I = dyn_cast<Instruction>(U);
1104 case Instruction::Call: {
1105 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1106 auto *Intrinsic = cast<IntrinsicInst>(U);
1108 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1110 case Instruction::Br:
1111 case Instruction::Ret:
1112 case Instruction::PHI:
1113 case Instruction::Switch:
1114 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1115 case Instruction::ExtractValue:
1116 case Instruction::Freeze:
1118 case Instruction::Alloca:
1119 if (cast<AllocaInst>(U)->isStaticAlloca())
1122 case Instruction::GetElementPtr: {
1123 const auto *
GEP = cast<GEPOperator>(U);
1124 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1125 GEP->getPointerOperand(),
1128 case Instruction::Add:
1129 case Instruction::FAdd:
1130 case Instruction::Sub:
1131 case Instruction::FSub:
1132 case Instruction::Mul:
1133 case Instruction::FMul:
1134 case Instruction::UDiv:
1135 case Instruction::SDiv:
1136 case Instruction::FDiv:
1137 case Instruction::URem:
1138 case Instruction::SRem:
1139 case Instruction::FRem:
1140 case Instruction::Shl:
1141 case Instruction::LShr:
1142 case Instruction::AShr:
1143 case Instruction::And:
1144 case Instruction::Or:
1145 case Instruction::Xor:
1146 case Instruction::FNeg: {
1149 if (Opcode != Instruction::FNeg)
1152 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1155 case Instruction::IntToPtr:
1156 case Instruction::PtrToInt:
1157 case Instruction::SIToFP:
1158 case Instruction::UIToFP:
1159 case Instruction::FPToUI:
1160 case Instruction::FPToSI:
1161 case Instruction::Trunc:
1162 case Instruction::FPTrunc:
1163 case Instruction::BitCast:
1164 case Instruction::FPExt:
1165 case Instruction::SExt:
1166 case Instruction::ZExt:
1167 case Instruction::AddrSpaceCast: {
1168 Type *OpTy = U->getOperand(0)->getType();
1169 return TargetTTI->getCastInstrCost(
1172 case Instruction::Store: {
1173 auto *
SI = cast<StoreInst>(U);
1174 Type *ValTy = U->getOperand(0)->getType();
1176 return TargetTTI->getMemoryOpCost(Opcode, ValTy,
SI->getAlign(),
1180 case Instruction::Load: {
1184 auto *LI = cast<LoadInst>(U);
1185 Type *LoadType = U->getType();
1195 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1196 LoadType = TI->getDestTy();
1198 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1200 {TTI::OK_AnyValue, TTI::OP_None},
I);
1202 case Instruction::Select: {
1203 const Value *Op0, *Op1;
1214 return TargetTTI->getArithmeticInstrCost(
1218 Type *CondTy = U->getOperand(0)->getType();
1219 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1223 case Instruction::ICmp:
1224 case Instruction::FCmp: {
1225 Type *ValTy = U->getOperand(0)->getType();
1227 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1228 I ? cast<CmpInst>(
I)->getPredicate()
1232 case Instruction::InsertElement: {
1233 auto *IE = dyn_cast<InsertElementInst>(U);
1237 if (
auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
1238 if (CI->getValue().getActiveBits() <= 32)
1239 Idx = CI->getZExtValue();
1240 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1242 case Instruction::ShuffleVector: {
1243 auto *
Shuffle = dyn_cast<ShuffleVectorInst>(U);
1247 auto *VecTy = cast<VectorType>(U->getType());
1248 auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
1249 int NumSubElts, SubIndex;
1251 if (
Shuffle->changesLength()) {
1253 if (
Shuffle->increasesLength() &&
Shuffle->isIdentityWithPadding())
1256 if (
Shuffle->isExtractSubvectorMask(SubIndex))
1261 if (
Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1262 return TargetTTI->getShuffleCost(
1268 int ReplicationFactor, VF;
1269 if (
Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1270 APInt DemandedDstElts =
1274 DemandedDstElts.
setBit(
I.index());
1276 return TargetTTI->getReplicationShuffleCost(
1277 VecSrcTy->getElementType(), ReplicationFactor, VF,
1302 if (
Shuffle->isZeroEltSplat())
1307 if (
Shuffle->isSingleSource())
1312 if (
Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1313 return TargetTTI->getShuffleCost(
1318 if (
Shuffle->isSplice(SubIndex))
1327 case Instruction::ExtractElement: {
1328 auto *EEI = dyn_cast<ExtractElementInst>(U);
1332 if (
auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
1333 if (CI->getValue().getActiveBits() <= 32)
1334 Idx = CI->getZExtValue();
1335 Type *DstTy = U->getOperand(0)->getType();
1336 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1346 auto *TargetTTI =
static_cast<T *
>(
this);
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
Drive the analysis of interleaved memory accesses in the loop.
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
uint64_t getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr int UndefMaskElem
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.