14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
56 if (!isa<Constant>(Operand))
69 return SI.getNumCases();
139 std::pair<const Value *, unsigned>
141 return std::make_pair(
nullptr, -1);
150 assert(
F &&
"A concrete function must be provided to this routine.");
157 if (
F->isIntrinsic())
160 if (
F->hasLocalLinkage() || !
F->hasName())
167 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
177 Name ==
"atan2" ||
Name ==
"atan2f" ||
Name ==
"atan2l"||
182 Name ==
"exp10" ||
Name ==
"exp10l" ||
Name ==
"exp10f")
187 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
188 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
216 std::optional<Value *>
219 bool &KnownBitsComputed)
const {
227 SimplifyAndSetOp)
const {
245 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
247 int64_t ScalableOffset = 0)
const {
250 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
318 Align Alignment)
const {
340 Align Alignment,
unsigned AddrSpace) {
361 unsigned AddrSpace)
const {
364 Scale, AddrSpace,
nullptr,
376 bool useAA()
const {
return false; }
395 unsigned ScalarOpdIdx)
const {
410 const APInt &DemandedElts,
411 bool Insert,
bool Extract,
433 bool IsZeroCmp)
const {
447 return isa<SelectInst>(
I) &&
460 unsigned *
Fast)
const {
516 return "Generic::Unknown Register Class";
518 return "Generic::ScalarRC";
520 return "Generic::VectorRC";
543 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
547 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
548 AllowPromotionWithoutCommonHeader =
false;
553 std::optional<unsigned>
564 std::optional<unsigned>
580 unsigned NumStridedMemAccesses,
581 unsigned NumPrefetches,
bool HasCall)
const {
597 auto IsWidenableCondition = [](
const Value *V) {
598 if (
auto *
II = dyn_cast<IntrinsicInst>(V))
599 if (
II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
608 case Instruction::FDiv:
609 case Instruction::FRem:
610 case Instruction::SDiv:
611 case Instruction::SRem:
612 case Instruction::UDiv:
613 case Instruction::URem:
616 case Instruction::And:
617 case Instruction::Or:
618 if (
any_of(Args, IsWidenableCondition))
654 case Instruction::IntToPtr: {
655 unsigned SrcSize = Src->getScalarSizeInBits();
661 case Instruction::PtrToInt: {
662 unsigned DstSize = Dst->getScalarSizeInBits();
668 case Instruction::BitCast:
669 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
673 case Instruction::Trunc: {
687 unsigned Index)
const {
723 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx)
const {
729 unsigned Index)
const {
734 const APInt &DemandedDstElts,
761 const Value *
Ptr,
bool VariableMask,
769 const Value *
Ptr,
bool VariableMask,
779 bool UseMaskForCond,
bool UseMaskForGaps)
const {
785 switch (ICA.
getID()) {
788 case Intrinsic::experimental_vector_histogram_add:
791 case Intrinsic::allow_runtime_check:
792 case Intrinsic::allow_ubsan_check:
793 case Intrinsic::annotation:
794 case Intrinsic::assume:
795 case Intrinsic::sideeffect:
796 case Intrinsic::pseudoprobe:
797 case Intrinsic::arithmetic_fence:
798 case Intrinsic::dbg_assign:
799 case Intrinsic::dbg_declare:
800 case Intrinsic::dbg_value:
801 case Intrinsic::dbg_label:
802 case Intrinsic::invariant_start:
803 case Intrinsic::invariant_end:
804 case Intrinsic::launder_invariant_group:
805 case Intrinsic::strip_invariant_group:
806 case Intrinsic::is_constant:
807 case Intrinsic::lifetime_start:
808 case Intrinsic::lifetime_end:
809 case Intrinsic::experimental_noalias_scope_decl:
810 case Intrinsic::objectsize:
811 case Intrinsic::ptr_annotation:
812 case Intrinsic::var_annotation:
813 case Intrinsic::experimental_gc_result:
814 case Intrinsic::experimental_gc_relocate:
815 case Intrinsic::coro_alloc:
816 case Intrinsic::coro_begin:
817 case Intrinsic::coro_begin_custom_abi:
818 case Intrinsic::coro_free:
819 case Intrinsic::coro_end:
820 case Intrinsic::coro_frame:
821 case Intrinsic::coro_size:
822 case Intrinsic::coro_align:
823 case Intrinsic::coro_suspend:
824 case Intrinsic::coro_subfn_addr:
825 case Intrinsic::threadlocal_address:
826 case Intrinsic::experimental_widenable_condition:
827 case Intrinsic::ssa_copy:
844 const SCEV *)
const {
849 std::optional<FastMathFlags> FMF,
891 Type *ExpectedType)
const {
897 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
899 std::optional<uint32_t> AtomicElementSize)
const {
900 return AtomicElementSize ?
Type::getIntNTy(Context, *AtomicElementSize * 8)
906 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
908 std::optional<uint32_t> AtomicCpySize)
const {
909 unsigned OpSizeInBytes = AtomicCpySize.value_or(1);
911 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
917 return (Caller->getFnAttribute(
"target-cpu") ==
918 Callee->getFnAttribute(
"target-cpu")) &&
919 (Caller->getFnAttribute(
"target-features") ==
920 Callee->getFnAttribute(
"target-features"));
924 unsigned DefaultCallPenalty)
const {
925 return DefaultCallPenalty;
930 return (Caller->getFnAttribute(
"target-cpu") ==
931 Callee->getFnAttribute(
"target-cpu")) &&
932 (Caller->getFnAttribute(
"target-features") ==
933 Callee->getFnAttribute(
"target-features"));
953 unsigned AddrSpace)
const {
958 unsigned AddrSpace)
const {
970 unsigned ChainSizeInBytes,
976 unsigned ChainSizeInBytes,
1013 Align Alignment)
const {
1043 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
1044 const auto *VectorValue = cast<Constant>(Val);
1048 auto *VT = cast<FixedVectorType>(Val->
getType());
1054 unsigned MaxRequiredSize =
1055 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
1057 unsigned MinRequiredSize = 0;
1058 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
1059 if (
auto *IntElement =
1060 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
1061 bool signedElement = IntElement->getValue().isNegative();
1063 unsigned ElementMinRequiredSize =
1064 IntElement->getValue().getSignificantBits() - 1;
1068 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1071 return MaxRequiredSize;
1074 return MinRequiredSize;
1077 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
1078 isSigned = CI->getValue().isNegative();
1079 return CI->getValue().getSignificantBits() - 1;
1082 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
1084 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1087 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1089 return Cast->getSrcTy()->getScalarSizeInBits();
1097 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
1109 int64_t MergeDistance)
const {
1123template <
typename T>
1137 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
1138 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
1139 bool HasBaseReg = (BaseGV ==
nullptr);
1142 APInt BaseOffset(PtrSizeBits, 0);
1146 Type *TargetType =
nullptr;
1154 TargetType = GTI.getIndexedType();
1157 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
1160 ConstIdx = dyn_cast<ConstantInt>(
Splat);
1161 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1163 assert(ConstIdx &&
"Unexpected GEP index");
1171 int64_t ElementSize =
1172 GTI.getSequentialElementStride(
DL).getFixedValue();
1181 Scale = ElementSize;
1196 AccessType = TargetType;
1203 Ptr->getType()->getPointerAddressSpace()))
1228 for (
const Value *V : Ptrs) {
1229 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1232 if (
Info.isSameBase() && V !=
Base) {
1233 if (
GEP->hasAllConstantIndices())
1242 GEP->getPointerOperand(),
1254 auto *TargetTTI =
static_cast<T *
>(
this);
1257 auto *CB = dyn_cast<CallBase>(U);
1258 if (CB && !isa<IntrinsicInst>(U)) {
1259 if (
const Function *
F = CB->getCalledFunction()) {
1260 if (!TargetTTI->isLoweredToCall(
F))
1269 Type *Ty = U->getType();
1271 auto *
I = dyn_cast<Instruction>(U);
1275 case Instruction::Call: {
1276 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1277 auto *Intrinsic = cast<IntrinsicInst>(U);
1279 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1281 case Instruction::Br:
1282 case Instruction::Ret:
1283 case Instruction::PHI:
1284 case Instruction::Switch:
1285 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1286 case Instruction::ExtractValue:
1287 case Instruction::Freeze:
1289 case Instruction::Alloca:
1290 if (cast<AllocaInst>(U)->isStaticAlloca())
1293 case Instruction::GetElementPtr: {
1294 const auto *
GEP = cast<GEPOperator>(U);
1295 Type *AccessType =
nullptr;
1298 if (
GEP->hasOneUser() &&
I)
1299 AccessType =
I->user_back()->getAccessType();
1301 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1305 case Instruction::Add:
1306 case Instruction::FAdd:
1307 case Instruction::Sub:
1308 case Instruction::FSub:
1309 case Instruction::Mul:
1310 case Instruction::FMul:
1311 case Instruction::UDiv:
1312 case Instruction::SDiv:
1313 case Instruction::FDiv:
1314 case Instruction::URem:
1315 case Instruction::SRem:
1316 case Instruction::FRem:
1317 case Instruction::Shl:
1318 case Instruction::LShr:
1319 case Instruction::AShr:
1320 case Instruction::And:
1321 case Instruction::Or:
1322 case Instruction::Xor:
1323 case Instruction::FNeg: {
1326 if (Opcode != Instruction::FNeg)
1328 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1331 case Instruction::IntToPtr:
1332 case Instruction::PtrToInt:
1333 case Instruction::SIToFP:
1334 case Instruction::UIToFP:
1335 case Instruction::FPToUI:
1336 case Instruction::FPToSI:
1337 case Instruction::Trunc:
1338 case Instruction::FPTrunc:
1339 case Instruction::BitCast:
1340 case Instruction::FPExt:
1341 case Instruction::SExt:
1342 case Instruction::ZExt:
1343 case Instruction::AddrSpaceCast: {
1345 return TargetTTI->getCastInstrCost(
1348 case Instruction::Store: {
1349 auto *SI = cast<StoreInst>(U);
1352 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1353 SI->getPointerAddressSpace(),
CostKind,
1356 case Instruction::Load: {
1360 auto *LI = cast<LoadInst>(U);
1361 Type *LoadType = U->getType();
1371 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1372 LoadType = TI->getDestTy();
1374 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1376 {TTI::OK_AnyValue, TTI::OP_None},
I);
1378 case Instruction::Select: {
1379 const Value *Op0, *Op1;
1390 return TargetTTI->getArithmeticInstrCost(
1397 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1401 case Instruction::ICmp:
1402 case Instruction::FCmp: {
1407 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1408 I ? cast<CmpInst>(
I)->getPredicate()
1412 case Instruction::InsertElement: {
1413 auto *IE = dyn_cast<InsertElementInst>(U);
1417 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[2]))
1418 if (CI->getValue().getActiveBits() <= 32)
1419 Idx = CI->getZExtValue();
1420 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1422 case Instruction::ShuffleVector: {
1423 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1427 auto *VecTy = cast<VectorType>(U->getType());
1430 int NumSubElts, SubIndex;
1433 if (Shuffle->changesLength()) {
1435 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1438 if (Shuffle->isExtractSubvectorMask(SubIndex))
1443 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1444 return TargetTTI->getShuffleCost(
1449 int ReplicationFactor, VF;
1450 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1454 DemandedDstElts.
setBit(
I.index());
1456 return TargetTTI->getReplicationShuffleCost(
1457 VecSrcTy->getElementType(), ReplicationFactor, VF,
1461 bool IsUnary = isa<UndefValue>(
Operands[1]);
1462 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1468 if (Shuffle->increasesLength()) {
1469 for (
int &M : AdjustMask)
1470 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1472 return TargetTTI->getShuffleCost(
1486 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
1487 return ShuffleCost + TargetTTI->getShuffleCost(
1489 ExtractMask,
CostKind, 0, VecTy, {}, Shuffle);
1492 if (Shuffle->isIdentity())
1495 if (Shuffle->isReverse())
1499 if (Shuffle->isSelect())
1503 if (Shuffle->isTranspose())
1508 if (Shuffle->isZeroEltSplat())
1513 if (Shuffle->isSingleSource())
1518 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1519 return TargetTTI->getShuffleCost(
1524 if (Shuffle->isSplice(SubIndex))
1526 SubIndex,
nullptr,
Operands, Shuffle);
1531 case Instruction::ExtractElement: {
1532 auto *EEI = dyn_cast<ExtractElementInst>(U);
1536 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[1]))
1537 if (CI->getValue().getActiveBits() <= 32)
1538 Idx = CI->getZExtValue();
1540 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1550 auto *TargetTTI =
static_cast<T *
>(
this);
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Class to represent array types.
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.