14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
56 if (!isa<Constant>(Operand))
69 return SI.getNumCases();
134 std::pair<const Value *, unsigned>
136 return std::make_pair(
nullptr, -1);
145 assert(
F &&
"A concrete function must be provided to this routine.");
152 if (
F->isIntrinsic())
155 if (
F->hasLocalLinkage() || !
F->hasName())
162 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
174 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
175 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
201 std::optional<Value *>
204 bool &KnownBitsComputed)
const {
212 SimplifyAndSetOp)
const {
230 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
232 int64_t ScalableOffset = 0)
const {
235 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
303 Align Alignment)
const {
341 unsigned AddrSpace)
const {
344 Scale, AddrSpace,
nullptr,
356 bool useAA()
const {
return false; }
371 const APInt &DemandedElts,
372 bool Insert,
bool Extract,
393 bool IsZeroCmp)
const {
403 return isa<SelectInst>(
I) &&
416 unsigned *
Fast)
const {
472 return "Generic::Unknown Register Class";
474 return "Generic::ScalarRC";
476 return "Generic::VectorRC";
499 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
503 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
504 AllowPromotionWithoutCommonHeader =
false;
509 std::optional<unsigned>
520 std::optional<unsigned>
536 unsigned NumStridedMemAccesses,
537 unsigned NumPrefetches,
bool HasCall)
const {
553 auto IsWidenableCondition = [](
const Value *V) {
554 if (
auto *
II = dyn_cast<IntrinsicInst>(V))
555 if (
II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
564 case Instruction::FDiv:
565 case Instruction::FRem:
566 case Instruction::SDiv:
567 case Instruction::SRem:
568 case Instruction::UDiv:
569 case Instruction::URem:
572 case Instruction::And:
573 case Instruction::Or:
574 if (
any_of(Args, IsWidenableCondition))
610 case Instruction::IntToPtr: {
611 unsigned SrcSize = Src->getScalarSizeInBits();
617 case Instruction::PtrToInt: {
618 unsigned DstSize = Dst->getScalarSizeInBits();
624 case Instruction::BitCast:
625 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
629 case Instruction::Trunc: {
643 unsigned Index)
const {
672 unsigned Index)
const {
677 const APInt &DemandedDstElts,
704 const Value *
Ptr,
bool VariableMask,
712 const Value *
Ptr,
bool VariableMask,
722 bool UseMaskForCond,
bool UseMaskForGaps)
const {
728 switch (ICA.
getID()) {
731 case Intrinsic::experimental_vector_histogram_add:
734 case Intrinsic::allow_runtime_check:
735 case Intrinsic::allow_ubsan_check:
736 case Intrinsic::annotation:
737 case Intrinsic::assume:
738 case Intrinsic::sideeffect:
739 case Intrinsic::pseudoprobe:
740 case Intrinsic::arithmetic_fence:
741 case Intrinsic::dbg_assign:
742 case Intrinsic::dbg_declare:
743 case Intrinsic::dbg_value:
744 case Intrinsic::dbg_label:
745 case Intrinsic::invariant_start:
746 case Intrinsic::invariant_end:
747 case Intrinsic::launder_invariant_group:
748 case Intrinsic::strip_invariant_group:
749 case Intrinsic::is_constant:
750 case Intrinsic::lifetime_start:
751 case Intrinsic::lifetime_end:
752 case Intrinsic::experimental_noalias_scope_decl:
753 case Intrinsic::objectsize:
754 case Intrinsic::ptr_annotation:
755 case Intrinsic::var_annotation:
756 case Intrinsic::experimental_gc_result:
757 case Intrinsic::experimental_gc_relocate:
758 case Intrinsic::coro_alloc:
759 case Intrinsic::coro_begin:
760 case Intrinsic::coro_free:
761 case Intrinsic::coro_end:
762 case Intrinsic::coro_frame:
763 case Intrinsic::coro_size:
764 case Intrinsic::coro_align:
765 case Intrinsic::coro_suspend:
766 case Intrinsic::coro_subfn_addr:
767 case Intrinsic::threadlocal_address:
768 case Intrinsic::experimental_widenable_condition:
769 case Intrinsic::ssa_copy:
786 const SCEV *)
const {
791 std::optional<FastMathFlags> FMF,
833 Type *ExpectedType)
const {
839 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
841 std::optional<uint32_t> AtomicElementSize)
const {
842 return AtomicElementSize ?
Type::getIntNTy(Context, *AtomicElementSize * 8)
848 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
850 std::optional<uint32_t> AtomicCpySize)
const {
851 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
853 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
859 return (Caller->getFnAttribute(
"target-cpu") ==
860 Callee->getFnAttribute(
"target-cpu")) &&
861 (Caller->getFnAttribute(
"target-features") ==
862 Callee->getFnAttribute(
"target-features"));
866 unsigned DefaultCallPenalty)
const {
867 return DefaultCallPenalty;
872 return (Caller->getFnAttribute(
"target-cpu") ==
873 Callee->getFnAttribute(
"target-cpu")) &&
874 (Caller->getFnAttribute(
"target-features") ==
875 Callee->getFnAttribute(
"target-features"));
895 unsigned AddrSpace)
const {
900 unsigned AddrSpace)
const {
912 unsigned ChainSizeInBytes,
918 unsigned ChainSizeInBytes,
955 Align Alignment)
const {
974 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
975 const auto *VectorValue = cast<Constant>(Val);
979 auto *VT = cast<FixedVectorType>(Val->
getType());
985 unsigned MaxRequiredSize =
986 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
988 unsigned MinRequiredSize = 0;
989 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
990 if (
auto *IntElement =
991 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
992 bool signedElement = IntElement->getValue().isNegative();
994 unsigned ElementMinRequiredSize =
995 IntElement->getValue().getSignificantBits() - 1;
999 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1002 return MaxRequiredSize;
1005 return MinRequiredSize;
1008 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
1009 isSigned = CI->getValue().isNegative();
1010 return CI->getValue().getSignificantBits() - 1;
1013 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
1015 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1018 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1020 return Cast->getSrcTy()->getScalarSizeInBits();
1028 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
1040 int64_t MergeDistance)
const {
1054template <
typename T>
1068 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
1069 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
1070 bool HasBaseReg = (BaseGV ==
nullptr);
1073 APInt BaseOffset(PtrSizeBits, 0);
1077 Type *TargetType =
nullptr;
1085 TargetType = GTI.getIndexedType();
1088 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
1091 ConstIdx = dyn_cast<ConstantInt>(
Splat);
1092 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1094 assert(ConstIdx &&
"Unexpected GEP index");
1102 int64_t ElementSize =
1103 GTI.getSequentialElementStride(
DL).getFixedValue();
1112 Scale = ElementSize;
1127 AccessType = TargetType;
1134 Ptr->getType()->getPointerAddressSpace()))
1159 for (
const Value *V : Ptrs) {
1160 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1163 if (
Info.isSameBase() && V !=
Base) {
1164 if (
GEP->hasAllConstantIndices())
1173 GEP->getPointerOperand(),
1185 auto *TargetTTI =
static_cast<T *
>(
this);
1188 auto *CB = dyn_cast<CallBase>(U);
1189 if (CB && !isa<IntrinsicInst>(U)) {
1190 if (
const Function *
F = CB->getCalledFunction()) {
1191 if (!TargetTTI->isLoweredToCall(
F))
1200 Type *Ty = U->getType();
1202 auto *
I = dyn_cast<Instruction>(U);
1206 case Instruction::Call: {
1207 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1208 auto *Intrinsic = cast<IntrinsicInst>(U);
1210 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1212 case Instruction::Br:
1213 case Instruction::Ret:
1214 case Instruction::PHI:
1215 case Instruction::Switch:
1216 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1217 case Instruction::ExtractValue:
1218 case Instruction::Freeze:
1220 case Instruction::Alloca:
1221 if (cast<AllocaInst>(U)->isStaticAlloca())
1224 case Instruction::GetElementPtr: {
1225 const auto *
GEP = cast<GEPOperator>(U);
1226 Type *AccessType =
nullptr;
1229 if (
GEP->hasOneUser() &&
I)
1230 AccessType =
I->user_back()->getAccessType();
1232 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1236 case Instruction::Add:
1237 case Instruction::FAdd:
1238 case Instruction::Sub:
1239 case Instruction::FSub:
1240 case Instruction::Mul:
1241 case Instruction::FMul:
1242 case Instruction::UDiv:
1243 case Instruction::SDiv:
1244 case Instruction::FDiv:
1245 case Instruction::URem:
1246 case Instruction::SRem:
1247 case Instruction::FRem:
1248 case Instruction::Shl:
1249 case Instruction::LShr:
1250 case Instruction::AShr:
1251 case Instruction::And:
1252 case Instruction::Or:
1253 case Instruction::Xor:
1254 case Instruction::FNeg: {
1257 if (Opcode != Instruction::FNeg)
1259 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1262 case Instruction::IntToPtr:
1263 case Instruction::PtrToInt:
1264 case Instruction::SIToFP:
1265 case Instruction::UIToFP:
1266 case Instruction::FPToUI:
1267 case Instruction::FPToSI:
1268 case Instruction::Trunc:
1269 case Instruction::FPTrunc:
1270 case Instruction::BitCast:
1271 case Instruction::FPExt:
1272 case Instruction::SExt:
1273 case Instruction::ZExt:
1274 case Instruction::AddrSpaceCast: {
1276 return TargetTTI->getCastInstrCost(
1279 case Instruction::Store: {
1280 auto *SI = cast<StoreInst>(U);
1283 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1284 SI->getPointerAddressSpace(),
CostKind,
1287 case Instruction::Load: {
1291 auto *LI = cast<LoadInst>(U);
1292 Type *LoadType = U->getType();
1302 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1303 LoadType = TI->getDestTy();
1305 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1307 {TTI::OK_AnyValue, TTI::OP_None},
I);
1309 case Instruction::Select: {
1310 const Value *Op0, *Op1;
1321 return TargetTTI->getArithmeticInstrCost(
1326 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1330 case Instruction::ICmp:
1331 case Instruction::FCmp: {
1334 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1335 I ? cast<CmpInst>(
I)->getPredicate()
1339 case Instruction::InsertElement: {
1340 auto *IE = dyn_cast<InsertElementInst>(U);
1344 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[2]))
1345 if (CI->getValue().getActiveBits() <= 32)
1346 Idx = CI->getZExtValue();
1347 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1349 case Instruction::ShuffleVector: {
1350 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1354 auto *VecTy = cast<VectorType>(U->getType());
1357 int NumSubElts, SubIndex;
1360 if (Shuffle->changesLength()) {
1362 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1365 if (Shuffle->isExtractSubvectorMask(SubIndex))
1370 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1371 return TargetTTI->getShuffleCost(
1376 int ReplicationFactor, VF;
1377 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1381 DemandedDstElts.
setBit(
I.index());
1383 return TargetTTI->getReplicationShuffleCost(
1384 VecSrcTy->getElementType(), ReplicationFactor, VF,
1388 bool IsUnary = isa<UndefValue>(
Operands[1]);
1389 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1395 if (Shuffle->increasesLength()) {
1396 for (
int &M : AdjustMask)
1397 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1399 return TargetTTI->getShuffleCost(
1413 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
1414 return ShuffleCost + TargetTTI->getShuffleCost(
1416 ExtractMask,
CostKind, 0, VecTy, {}, Shuffle);
1419 if (Shuffle->isIdentity())
1422 if (Shuffle->isReverse())
1426 if (Shuffle->isSelect())
1430 if (Shuffle->isTranspose())
1435 if (Shuffle->isZeroEltSplat())
1440 if (Shuffle->isSingleSource())
1445 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1446 return TargetTTI->getShuffleCost(
1451 if (Shuffle->isSplice(SubIndex))
1453 SubIndex,
nullptr,
Operands, Shuffle);
1458 case Instruction::ExtractElement: {
1459 auto *EEI = dyn_cast<ExtractElementInst>(U);
1463 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[1]))
1464 if (CI->getValue().getActiveBits() <= 32)
1465 Idx = CI->getZExtValue();
1467 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1477 auto *TargetTTI =
static_cast<T *
>(
this);
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.