14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
55 if (!isa<Constant>(Operand))
68 return SI.getNumCases();
131 std::pair<const Value *, unsigned>
133 return std::make_pair(
nullptr, -1);
142 assert(
F &&
"A concrete function must be provided to this routine.");
149 if (
F->isIntrinsic())
152 if (
F->hasLocalLinkage() || !
F->hasName())
158 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
168 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
169 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
195 std::optional<Value *>
198 bool &KnownBitsComputed)
const {
206 SimplifyAndSetOp)
const {
224 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
226 int64_t ScalableOffset = 0)
const {
229 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
297 Align Alignment)
const {
329 int64_t BaseOffset,
bool HasBaseReg,
331 unsigned AddrSpace)
const {
345 bool useAA()
const {
return false; }
360 const APInt &DemandedElts,
361 bool Insert,
bool Extract,
382 bool IsZeroCmp)
const {
392 return isa<SelectInst>(
I) &&
405 unsigned *
Fast)
const {
460 return "Generic::Unknown Register Class";
462 return "Generic::ScalarRC";
464 return "Generic::VectorRC";
487 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
491 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
492 AllowPromotionWithoutCommonHeader =
false;
497 std::optional<unsigned>
508 std::optional<unsigned>
524 unsigned NumStridedMemAccesses,
525 unsigned NumPrefetches,
bool HasCall)
const {
541 auto IsWidenableCondition = [](
const Value *V) {
542 if (
auto *II = dyn_cast<IntrinsicInst>(V))
543 if (II->
getIntrinsicID() == Intrinsic::experimental_widenable_condition)
552 case Instruction::FDiv:
553 case Instruction::FRem:
554 case Instruction::SDiv:
555 case Instruction::SRem:
556 case Instruction::UDiv:
557 case Instruction::URem:
560 case Instruction::And:
561 case Instruction::Or:
562 if (
any_of(Args, IsWidenableCondition))
596 case Instruction::IntToPtr: {
597 unsigned SrcSize = Src->getScalarSizeInBits();
603 case Instruction::PtrToInt: {
604 unsigned DstSize = Dst->getScalarSizeInBits();
610 case Instruction::BitCast:
611 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
615 case Instruction::Trunc: {
629 unsigned Index)
const {
658 unsigned Index)
const {
663 const APInt &DemandedDstElts,
690 const Value *
Ptr,
bool VariableMask,
698 const Value *
Ptr,
bool VariableMask,
708 bool UseMaskForCond,
bool UseMaskForGaps)
const {
714 switch (ICA.
getID()) {
717 case Intrinsic::annotation:
718 case Intrinsic::assume:
719 case Intrinsic::sideeffect:
720 case Intrinsic::pseudoprobe:
721 case Intrinsic::arithmetic_fence:
722 case Intrinsic::dbg_assign:
723 case Intrinsic::dbg_declare:
724 case Intrinsic::dbg_value:
725 case Intrinsic::dbg_label:
726 case Intrinsic::invariant_start:
727 case Intrinsic::invariant_end:
728 case Intrinsic::launder_invariant_group:
729 case Intrinsic::strip_invariant_group:
730 case Intrinsic::is_constant:
731 case Intrinsic::lifetime_start:
732 case Intrinsic::lifetime_end:
733 case Intrinsic::experimental_noalias_scope_decl:
734 case Intrinsic::objectsize:
735 case Intrinsic::ptr_annotation:
736 case Intrinsic::var_annotation:
737 case Intrinsic::experimental_gc_result:
738 case Intrinsic::experimental_gc_relocate:
739 case Intrinsic::coro_alloc:
740 case Intrinsic::coro_begin:
741 case Intrinsic::coro_free:
742 case Intrinsic::coro_end:
743 case Intrinsic::coro_frame:
744 case Intrinsic::coro_size:
745 case Intrinsic::coro_align:
746 case Intrinsic::coro_suspend:
747 case Intrinsic::coro_subfn_addr:
748 case Intrinsic::threadlocal_address:
749 case Intrinsic::experimental_widenable_condition:
750 case Intrinsic::ssa_copy:
767 const SCEV *)
const {
772 std::optional<FastMathFlags> FMF,
814 Type *ExpectedType)
const {
820 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
821 unsigned SrcAlign,
unsigned DestAlign,
822 std::optional<uint32_t> AtomicElementSize)
const {
829 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
830 unsigned SrcAlign,
unsigned DestAlign,
831 std::optional<uint32_t> AtomicCpySize)
const {
832 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
834 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
840 return (Caller->getFnAttribute(
"target-cpu") ==
841 Callee->getFnAttribute(
"target-cpu")) &&
842 (Caller->getFnAttribute(
"target-features") ==
843 Callee->getFnAttribute(
"target-features"));
847 unsigned DefaultCallPenalty)
const {
848 return DefaultCallPenalty;
853 return (Caller->getFnAttribute(
"target-cpu") ==
854 Callee->getFnAttribute(
"target-cpu")) &&
855 (Caller->getFnAttribute(
"target-features") ==
856 Callee->getFnAttribute(
"target-features"));
876 unsigned AddrSpace)
const {
881 unsigned AddrSpace)
const {
893 unsigned ChainSizeInBytes,
899 unsigned ChainSizeInBytes,
929 Align Alignment)
const {
948 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
949 const auto *VectorValue = cast<Constant>(Val);
953 auto *VT = cast<FixedVectorType>(Val->
getType());
959 unsigned MaxRequiredSize =
960 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
962 unsigned MinRequiredSize = 0;
963 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
964 if (
auto *IntElement =
965 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
966 bool signedElement = IntElement->getValue().isNegative();
968 unsigned ElementMinRequiredSize =
969 IntElement->getValue().getSignificantBits() - 1;
973 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
976 return MaxRequiredSize;
979 return MinRequiredSize;
982 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
983 isSigned = CI->getValue().isNegative();
984 return CI->getValue().getSignificantBits() - 1;
987 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
989 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
992 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
994 return Cast->getSrcTy()->getScalarSizeInBits();
1002 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
1014 int64_t MergeDistance)
const {
1028template <
typename T>
1042 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
1043 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
1044 bool HasBaseReg = (BaseGV ==
nullptr);
1047 APInt BaseOffset(PtrSizeBits, 0);
1051 Type *TargetType =
nullptr;
1059 TargetType = GTI.getIndexedType();
1062 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
1065 ConstIdx = dyn_cast<ConstantInt>(
Splat);
1066 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1068 assert(ConstIdx &&
"Unexpected GEP index");
1076 int64_t ElementSize =
1077 GTI.getSequentialElementStride(
DL).getFixedValue();
1086 Scale = ElementSize;
1101 AccessType = TargetType;
1108 Ptr->getType()->getPointerAddressSpace()))
1133 for (
const Value *V : Ptrs) {
1134 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1137 if (
Info.isSameBase() && V !=
Base) {
1138 if (
GEP->hasAllConstantIndices())
1147 GEP->getPointerOperand(),
1159 auto *TargetTTI =
static_cast<T *
>(
this);
1162 auto *CB = dyn_cast<CallBase>(U);
1163 if (CB && !isa<IntrinsicInst>(U)) {
1164 if (
const Function *
F = CB->getCalledFunction()) {
1165 if (!TargetTTI->isLoweredToCall(
F))
1174 Type *Ty = U->getType();
1176 auto *
I = dyn_cast<Instruction>(U);
1180 case Instruction::Call: {
1181 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1182 auto *Intrinsic = cast<IntrinsicInst>(U);
1184 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1186 case Instruction::Br:
1187 case Instruction::Ret:
1188 case Instruction::PHI:
1189 case Instruction::Switch:
1190 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1191 case Instruction::ExtractValue:
1192 case Instruction::Freeze:
1194 case Instruction::Alloca:
1195 if (cast<AllocaInst>(U)->isStaticAlloca())
1198 case Instruction::GetElementPtr: {
1199 const auto *
GEP = cast<GEPOperator>(U);
1200 Type *AccessType =
nullptr;
1203 if (
GEP->hasOneUser() &&
I)
1204 AccessType =
I->user_back()->getAccessType();
1206 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1210 case Instruction::Add:
1211 case Instruction::FAdd:
1212 case Instruction::Sub:
1213 case Instruction::FSub:
1214 case Instruction::Mul:
1215 case Instruction::FMul:
1216 case Instruction::UDiv:
1217 case Instruction::SDiv:
1218 case Instruction::FDiv:
1219 case Instruction::URem:
1220 case Instruction::SRem:
1221 case Instruction::FRem:
1222 case Instruction::Shl:
1223 case Instruction::LShr:
1224 case Instruction::AShr:
1225 case Instruction::And:
1226 case Instruction::Or:
1227 case Instruction::Xor:
1228 case Instruction::FNeg: {
1231 if (Opcode != Instruction::FNeg)
1233 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1236 case Instruction::IntToPtr:
1237 case Instruction::PtrToInt:
1238 case Instruction::SIToFP:
1239 case Instruction::UIToFP:
1240 case Instruction::FPToUI:
1241 case Instruction::FPToSI:
1242 case Instruction::Trunc:
1243 case Instruction::FPTrunc:
1244 case Instruction::BitCast:
1245 case Instruction::FPExt:
1246 case Instruction::SExt:
1247 case Instruction::ZExt:
1248 case Instruction::AddrSpaceCast: {
1250 return TargetTTI->getCastInstrCost(
1253 case Instruction::Store: {
1254 auto *SI = cast<StoreInst>(U);
1257 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1258 SI->getPointerAddressSpace(),
CostKind,
1261 case Instruction::Load: {
1265 auto *LI = cast<LoadInst>(U);
1266 Type *LoadType = U->getType();
1276 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1277 LoadType = TI->getDestTy();
1279 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1281 {TTI::OK_AnyValue, TTI::OP_None},
I);
1283 case Instruction::Select: {
1284 const Value *Op0, *Op1;
1295 return TargetTTI->getArithmeticInstrCost(
1300 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1304 case Instruction::ICmp:
1305 case Instruction::FCmp: {
1308 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1309 I ? cast<CmpInst>(
I)->getPredicate()
1313 case Instruction::InsertElement: {
1314 auto *IE = dyn_cast<InsertElementInst>(U);
1318 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[2]))
1319 if (CI->getValue().getActiveBits() <= 32)
1320 Idx = CI->getZExtValue();
1321 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1323 case Instruction::ShuffleVector: {
1324 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1328 auto *VecTy = cast<VectorType>(U->getType());
1331 int NumSubElts, SubIndex;
1334 if (Shuffle->changesLength()) {
1336 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1339 if (Shuffle->isExtractSubvectorMask(SubIndex))
1344 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1345 return TargetTTI->getShuffleCost(
1350 int ReplicationFactor, VF;
1351 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1355 DemandedDstElts.
setBit(
I.index());
1357 return TargetTTI->getReplicationShuffleCost(
1358 VecSrcTy->getElementType(), ReplicationFactor, VF,
1362 bool IsUnary = isa<UndefValue>(
Operands[1]);
1363 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1369 if (Shuffle->increasesLength()) {
1370 for (
int &M : AdjustMask)
1371 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1373 return TargetTTI->getShuffleCost(
1384 VecSrcTy, AdjustMask,
CostKind, 0,
nullptr);
1387 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
1389 VecSrcTy, ExtractMask,
1393 if (Shuffle->isIdentity())
1396 if (Shuffle->isReverse())
1400 if (Shuffle->isSelect())
1404 if (Shuffle->isTranspose())
1408 if (Shuffle->isZeroEltSplat())
1412 if (Shuffle->isSingleSource())
1416 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1417 return TargetTTI->getShuffleCost(
1421 if (Shuffle->isSplice(SubIndex))
1428 case Instruction::ExtractElement: {
1429 auto *EEI = dyn_cast<ExtractElementInst>(U);
1433 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[1]))
1434 if (CI->getValue().getActiveBits() <= 32)
1435 Idx = CI->getZExtValue();
1437 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1447 auto *TargetTTI =
static_cast<T *
>(
this);
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.