14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
56 if (!isa<Constant>(Operand))
69 return SI.getNumCases();
132 std::pair<const Value *, unsigned>
134 return std::make_pair(
nullptr, -1);
143 assert(
F &&
"A concrete function must be provided to this routine.");
150 if (
F->isIntrinsic())
153 if (
F->hasLocalLinkage() || !
F->hasName())
160 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
172 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
173 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
199 std::optional<Value *>
202 bool &KnownBitsComputed)
const {
210 SimplifyAndSetOp)
const {
228 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
230 int64_t ScalableOffset = 0)
const {
233 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
303 Align Alignment)
const {
341 unsigned AddrSpace)
const {
344 Scale, AddrSpace,
nullptr,
356 bool useAA()
const {
return false; }
371 const APInt &DemandedElts,
372 bool Insert,
bool Extract,
393 bool IsZeroCmp)
const {
403 return isa<SelectInst>(
I) &&
416 unsigned *
Fast)
const {
472 return "Generic::Unknown Register Class";
474 return "Generic::ScalarRC";
476 return "Generic::VectorRC";
499 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
503 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
504 AllowPromotionWithoutCommonHeader =
false;
509 std::optional<unsigned>
520 std::optional<unsigned>
536 unsigned NumStridedMemAccesses,
537 unsigned NumPrefetches,
bool HasCall)
const {
553 auto IsWidenableCondition = [](
const Value *V) {
554 if (
auto *
II = dyn_cast<IntrinsicInst>(V))
555 if (
II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
564 case Instruction::FDiv:
565 case Instruction::FRem:
566 case Instruction::SDiv:
567 case Instruction::SRem:
568 case Instruction::UDiv:
569 case Instruction::URem:
572 case Instruction::And:
573 case Instruction::Or:
574 if (
any_of(Args, IsWidenableCondition))
610 case Instruction::IntToPtr: {
611 unsigned SrcSize = Src->getScalarSizeInBits();
617 case Instruction::PtrToInt: {
618 unsigned DstSize = Dst->getScalarSizeInBits();
624 case Instruction::BitCast:
625 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
629 case Instruction::Trunc: {
643 unsigned Index)
const {
672 unsigned Index)
const {
677 const APInt &DemandedDstElts,
704 const Value *
Ptr,
bool VariableMask,
712 const Value *
Ptr,
bool VariableMask,
722 bool UseMaskForCond,
bool UseMaskForGaps)
const {
728 switch (ICA.
getID()) {
731 case Intrinsic::allow_runtime_check:
732 case Intrinsic::allow_ubsan_check:
733 case Intrinsic::annotation:
734 case Intrinsic::assume:
735 case Intrinsic::sideeffect:
736 case Intrinsic::pseudoprobe:
737 case Intrinsic::arithmetic_fence:
738 case Intrinsic::dbg_assign:
739 case Intrinsic::dbg_declare:
740 case Intrinsic::dbg_value:
741 case Intrinsic::dbg_label:
742 case Intrinsic::invariant_start:
743 case Intrinsic::invariant_end:
744 case Intrinsic::launder_invariant_group:
745 case Intrinsic::strip_invariant_group:
746 case Intrinsic::is_constant:
747 case Intrinsic::lifetime_start:
748 case Intrinsic::lifetime_end:
749 case Intrinsic::experimental_noalias_scope_decl:
750 case Intrinsic::objectsize:
751 case Intrinsic::ptr_annotation:
752 case Intrinsic::var_annotation:
753 case Intrinsic::experimental_gc_result:
754 case Intrinsic::experimental_gc_relocate:
755 case Intrinsic::coro_alloc:
756 case Intrinsic::coro_begin:
757 case Intrinsic::coro_free:
758 case Intrinsic::coro_end:
759 case Intrinsic::coro_frame:
760 case Intrinsic::coro_size:
761 case Intrinsic::coro_align:
762 case Intrinsic::coro_suspend:
763 case Intrinsic::coro_subfn_addr:
764 case Intrinsic::threadlocal_address:
765 case Intrinsic::experimental_widenable_condition:
766 case Intrinsic::ssa_copy:
783 const SCEV *)
const {
788 std::optional<FastMathFlags> FMF,
830 Type *ExpectedType)
const {
836 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
837 unsigned SrcAlign,
unsigned DestAlign,
838 std::optional<uint32_t> AtomicElementSize)
const {
839 return AtomicElementSize ?
Type::getIntNTy(Context, *AtomicElementSize * 8)
845 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
846 unsigned SrcAlign,
unsigned DestAlign,
847 std::optional<uint32_t> AtomicCpySize)
const {
848 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
850 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
856 return (Caller->getFnAttribute(
"target-cpu") ==
857 Callee->getFnAttribute(
"target-cpu")) &&
858 (Caller->getFnAttribute(
"target-features") ==
859 Callee->getFnAttribute(
"target-features"));
863 unsigned DefaultCallPenalty)
const {
864 return DefaultCallPenalty;
869 return (Caller->getFnAttribute(
"target-cpu") ==
870 Callee->getFnAttribute(
"target-cpu")) &&
871 (Caller->getFnAttribute(
"target-features") ==
872 Callee->getFnAttribute(
"target-features"));
892 unsigned AddrSpace)
const {
897 unsigned AddrSpace)
const {
909 unsigned ChainSizeInBytes,
915 unsigned ChainSizeInBytes,
945 Align Alignment)
const {
964 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
965 const auto *VectorValue = cast<Constant>(Val);
969 auto *VT = cast<FixedVectorType>(Val->
getType());
975 unsigned MaxRequiredSize =
976 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
978 unsigned MinRequiredSize = 0;
979 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
980 if (
auto *IntElement =
981 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
982 bool signedElement = IntElement->getValue().isNegative();
984 unsigned ElementMinRequiredSize =
985 IntElement->getValue().getSignificantBits() - 1;
989 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
992 return MaxRequiredSize;
995 return MinRequiredSize;
998 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
999 isSigned = CI->getValue().isNegative();
1000 return CI->getValue().getSignificantBits() - 1;
1003 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
1005 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1008 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1010 return Cast->getSrcTy()->getScalarSizeInBits();
1018 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
1030 int64_t MergeDistance)
const {
1044template <
typename T>
1058 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
1059 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
1060 bool HasBaseReg = (BaseGV ==
nullptr);
1063 APInt BaseOffset(PtrSizeBits, 0);
1067 Type *TargetType =
nullptr;
1075 TargetType = GTI.getIndexedType();
1078 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
1081 ConstIdx = dyn_cast<ConstantInt>(
Splat);
1082 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1084 assert(ConstIdx &&
"Unexpected GEP index");
1092 int64_t ElementSize =
1093 GTI.getSequentialElementStride(
DL).getFixedValue();
1102 Scale = ElementSize;
1117 AccessType = TargetType;
1124 Ptr->getType()->getPointerAddressSpace()))
1149 for (
const Value *V : Ptrs) {
1150 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1153 if (
Info.isSameBase() && V !=
Base) {
1154 if (
GEP->hasAllConstantIndices())
1163 GEP->getPointerOperand(),
1175 auto *TargetTTI =
static_cast<T *
>(
this);
1178 auto *CB = dyn_cast<CallBase>(U);
1179 if (CB && !isa<IntrinsicInst>(U)) {
1180 if (
const Function *
F = CB->getCalledFunction()) {
1181 if (!TargetTTI->isLoweredToCall(
F))
1190 Type *Ty = U->getType();
1192 auto *
I = dyn_cast<Instruction>(U);
1196 case Instruction::Call: {
1197 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1198 auto *Intrinsic = cast<IntrinsicInst>(U);
1200 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1202 case Instruction::Br:
1203 case Instruction::Ret:
1204 case Instruction::PHI:
1205 case Instruction::Switch:
1206 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1207 case Instruction::ExtractValue:
1208 case Instruction::Freeze:
1210 case Instruction::Alloca:
1211 if (cast<AllocaInst>(U)->isStaticAlloca())
1214 case Instruction::GetElementPtr: {
1215 const auto *
GEP = cast<GEPOperator>(U);
1216 Type *AccessType =
nullptr;
1219 if (
GEP->hasOneUser() &&
I)
1220 AccessType =
I->user_back()->getAccessType();
1222 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1226 case Instruction::Add:
1227 case Instruction::FAdd:
1228 case Instruction::Sub:
1229 case Instruction::FSub:
1230 case Instruction::Mul:
1231 case Instruction::FMul:
1232 case Instruction::UDiv:
1233 case Instruction::SDiv:
1234 case Instruction::FDiv:
1235 case Instruction::URem:
1236 case Instruction::SRem:
1237 case Instruction::FRem:
1238 case Instruction::Shl:
1239 case Instruction::LShr:
1240 case Instruction::AShr:
1241 case Instruction::And:
1242 case Instruction::Or:
1243 case Instruction::Xor:
1244 case Instruction::FNeg: {
1247 if (Opcode != Instruction::FNeg)
1249 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1252 case Instruction::IntToPtr:
1253 case Instruction::PtrToInt:
1254 case Instruction::SIToFP:
1255 case Instruction::UIToFP:
1256 case Instruction::FPToUI:
1257 case Instruction::FPToSI:
1258 case Instruction::Trunc:
1259 case Instruction::FPTrunc:
1260 case Instruction::BitCast:
1261 case Instruction::FPExt:
1262 case Instruction::SExt:
1263 case Instruction::ZExt:
1264 case Instruction::AddrSpaceCast: {
1266 return TargetTTI->getCastInstrCost(
1269 case Instruction::Store: {
1270 auto *SI = cast<StoreInst>(U);
1273 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1274 SI->getPointerAddressSpace(),
CostKind,
1277 case Instruction::Load: {
1281 auto *LI = cast<LoadInst>(U);
1282 Type *LoadType = U->getType();
1292 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1293 LoadType = TI->getDestTy();
1295 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1297 {TTI::OK_AnyValue, TTI::OP_None},
I);
1299 case Instruction::Select: {
1300 const Value *Op0, *Op1;
1311 return TargetTTI->getArithmeticInstrCost(
1316 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1320 case Instruction::ICmp:
1321 case Instruction::FCmp: {
1324 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1325 I ? cast<CmpInst>(
I)->getPredicate()
1329 case Instruction::InsertElement: {
1330 auto *IE = dyn_cast<InsertElementInst>(U);
1334 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[2]))
1335 if (CI->getValue().getActiveBits() <= 32)
1336 Idx = CI->getZExtValue();
1337 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1339 case Instruction::ShuffleVector: {
1340 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1344 auto *VecTy = cast<VectorType>(U->getType());
1347 int NumSubElts, SubIndex;
1350 if (Shuffle->changesLength()) {
1352 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1355 if (Shuffle->isExtractSubvectorMask(SubIndex))
1360 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1361 return TargetTTI->getShuffleCost(
1366 int ReplicationFactor, VF;
1367 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1371 DemandedDstElts.
setBit(
I.index());
1373 return TargetTTI->getReplicationShuffleCost(
1374 VecSrcTy->getElementType(), ReplicationFactor, VF,
1378 bool IsUnary = isa<UndefValue>(
Operands[1]);
1379 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1385 if (Shuffle->increasesLength()) {
1386 for (
int &M : AdjustMask)
1387 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1389 return TargetTTI->getShuffleCost(
1403 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
1404 return ShuffleCost + TargetTTI->getShuffleCost(
1406 ExtractMask,
CostKind, 0, VecTy, {}, Shuffle);
1409 if (Shuffle->isIdentity())
1412 if (Shuffle->isReverse())
1416 if (Shuffle->isSelect())
1420 if (Shuffle->isTranspose())
1425 if (Shuffle->isZeroEltSplat())
1430 if (Shuffle->isSingleSource())
1435 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1436 return TargetTTI->getShuffleCost(
1441 if (Shuffle->isSplice(SubIndex))
1443 SubIndex,
nullptr,
Operands, Shuffle);
1448 case Instruction::ExtractElement: {
1449 auto *EEI = dyn_cast<ExtractElementInst>(U);
1453 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[1]))
1454 if (CI->getValue().getActiveBits() <= 32)
1455 Idx = CI->getZExtValue();
1457 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1467 auto *TargetTTI =
static_cast<T *
>(
this);
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.