14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
56 if (!isa<Constant>(Operand))
69 return SI.getNumCases();
132 std::pair<const Value *, unsigned>
134 return std::make_pair(
nullptr, -1);
143 assert(
F &&
"A concrete function must be provided to this routine.");
150 if (
F->isIntrinsic())
153 if (
F->hasLocalLinkage() || !
F->hasName())
159 if (
Name ==
"copysign" ||
Name ==
"copysignf" ||
Name ==
"copysignl" ||
169 Name ==
"exp2l" ||
Name ==
"exp2f" ||
Name ==
"floor" ||
170 Name ==
"floorf" ||
Name ==
"ceil" ||
Name ==
"round" ||
196 std::optional<Value *>
199 bool &KnownBitsComputed)
const {
207 SimplifyAndSetOp)
const {
225 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
227 int64_t ScalableOffset = 0)
const {
230 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
298 Align Alignment)
const {
336 unsigned AddrSpace)
const {
339 Scale, AddrSpace,
nullptr,
351 bool useAA()
const {
return false; }
366 const APInt &DemandedElts,
367 bool Insert,
bool Extract,
388 bool IsZeroCmp)
const {
398 return isa<SelectInst>(
I) &&
411 unsigned *
Fast)
const {
466 return "Generic::Unknown Register Class";
468 return "Generic::ScalarRC";
470 return "Generic::VectorRC";
493 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const {
return 0; }
497 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
498 AllowPromotionWithoutCommonHeader =
false;
503 std::optional<unsigned>
514 std::optional<unsigned>
530 unsigned NumStridedMemAccesses,
531 unsigned NumPrefetches,
bool HasCall)
const {
547 auto IsWidenableCondition = [](
const Value *V) {
548 if (
auto *II = dyn_cast<IntrinsicInst>(V))
549 if (II->
getIntrinsicID() == Intrinsic::experimental_widenable_condition)
558 case Instruction::FDiv:
559 case Instruction::FRem:
560 case Instruction::SDiv:
561 case Instruction::SRem:
562 case Instruction::UDiv:
563 case Instruction::URem:
566 case Instruction::And:
567 case Instruction::Or:
568 if (
any_of(Args, IsWidenableCondition))
604 case Instruction::IntToPtr: {
605 unsigned SrcSize = Src->getScalarSizeInBits();
611 case Instruction::PtrToInt: {
612 unsigned DstSize = Dst->getScalarSizeInBits();
618 case Instruction::BitCast:
619 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
623 case Instruction::Trunc: {
637 unsigned Index)
const {
666 unsigned Index)
const {
671 const APInt &DemandedDstElts,
698 const Value *
Ptr,
bool VariableMask,
706 const Value *
Ptr,
bool VariableMask,
716 bool UseMaskForCond,
bool UseMaskForGaps)
const {
722 switch (ICA.
getID()) {
725 case Intrinsic::allow_runtime_check:
726 case Intrinsic::allow_ubsan_check:
727 case Intrinsic::annotation:
728 case Intrinsic::assume:
729 case Intrinsic::sideeffect:
730 case Intrinsic::pseudoprobe:
731 case Intrinsic::arithmetic_fence:
732 case Intrinsic::dbg_assign:
733 case Intrinsic::dbg_declare:
734 case Intrinsic::dbg_value:
735 case Intrinsic::dbg_label:
736 case Intrinsic::invariant_start:
737 case Intrinsic::invariant_end:
738 case Intrinsic::launder_invariant_group:
739 case Intrinsic::strip_invariant_group:
740 case Intrinsic::is_constant:
741 case Intrinsic::lifetime_start:
742 case Intrinsic::lifetime_end:
743 case Intrinsic::experimental_noalias_scope_decl:
744 case Intrinsic::objectsize:
745 case Intrinsic::ptr_annotation:
746 case Intrinsic::var_annotation:
747 case Intrinsic::experimental_gc_result:
748 case Intrinsic::experimental_gc_relocate:
749 case Intrinsic::coro_alloc:
750 case Intrinsic::coro_begin:
751 case Intrinsic::coro_free:
752 case Intrinsic::coro_end:
753 case Intrinsic::coro_frame:
754 case Intrinsic::coro_size:
755 case Intrinsic::coro_align:
756 case Intrinsic::coro_suspend:
757 case Intrinsic::coro_subfn_addr:
758 case Intrinsic::threadlocal_address:
759 case Intrinsic::experimental_widenable_condition:
760 case Intrinsic::ssa_copy:
777 const SCEV *)
const {
782 std::optional<FastMathFlags> FMF,
824 Type *ExpectedType)
const {
830 unsigned SrcAddrSpace,
unsigned DestAddrSpace,
831 unsigned SrcAlign,
unsigned DestAlign,
832 std::optional<uint32_t> AtomicElementSize)
const {
839 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
840 unsigned SrcAlign,
unsigned DestAlign,
841 std::optional<uint32_t> AtomicCpySize)
const {
842 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
844 for (
unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
850 return (Caller->getFnAttribute(
"target-cpu") ==
851 Callee->getFnAttribute(
"target-cpu")) &&
852 (Caller->getFnAttribute(
"target-features") ==
853 Callee->getFnAttribute(
"target-features"));
857 unsigned DefaultCallPenalty)
const {
858 return DefaultCallPenalty;
863 return (Caller->getFnAttribute(
"target-cpu") ==
864 Callee->getFnAttribute(
"target-cpu")) &&
865 (Caller->getFnAttribute(
"target-features") ==
866 Callee->getFnAttribute(
"target-features"));
886 unsigned AddrSpace)
const {
891 unsigned AddrSpace)
const {
903 unsigned ChainSizeInBytes,
909 unsigned ChainSizeInBytes,
939 Align Alignment)
const {
958 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
959 const auto *VectorValue = cast<Constant>(Val);
963 auto *VT = cast<FixedVectorType>(Val->
getType());
969 unsigned MaxRequiredSize =
970 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
972 unsigned MinRequiredSize = 0;
973 for (
unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
974 if (
auto *IntElement =
975 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
976 bool signedElement = IntElement->getValue().isNegative();
978 unsigned ElementMinRequiredSize =
979 IntElement->getValue().getSignificantBits() - 1;
983 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
986 return MaxRequiredSize;
989 return MinRequiredSize;
992 if (
const auto *CI = dyn_cast<ConstantInt>(Val)) {
993 isSigned = CI->getValue().isNegative();
994 return CI->getValue().getSignificantBits() - 1;
997 if (
const auto *Cast = dyn_cast<SExtInst>(Val)) {
999 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1002 if (
const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1004 return Cast->getSrcTy()->getScalarSizeInBits();
1012 return Ptr && isa<SCEVAddRecExpr>(
Ptr);
1024 int64_t MergeDistance)
const {
1038template <
typename T>
1052 assert(PointeeType &&
Ptr &&
"can't get GEPCost of nullptr");
1053 auto *BaseGV = dyn_cast<GlobalValue>(
Ptr->stripPointerCasts());
1054 bool HasBaseReg = (BaseGV ==
nullptr);
1057 APInt BaseOffset(PtrSizeBits, 0);
1061 Type *TargetType =
nullptr;
1069 TargetType = GTI.getIndexedType();
1072 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*
I);
1075 ConstIdx = dyn_cast<ConstantInt>(
Splat);
1076 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1078 assert(ConstIdx &&
"Unexpected GEP index");
1086 int64_t ElementSize =
1087 GTI.getSequentialElementStride(
DL).getFixedValue();
1096 Scale = ElementSize;
1111 AccessType = TargetType;
1118 Ptr->getType()->getPointerAddressSpace()))
1143 for (
const Value *V : Ptrs) {
1144 const auto *
GEP = dyn_cast<GetElementPtrInst>(V);
1147 if (
Info.isSameBase() && V !=
Base) {
1148 if (
GEP->hasAllConstantIndices())
1157 GEP->getPointerOperand(),
1169 auto *TargetTTI =
static_cast<T *
>(
this);
1172 auto *CB = dyn_cast<CallBase>(U);
1173 if (CB && !isa<IntrinsicInst>(U)) {
1174 if (
const Function *
F = CB->getCalledFunction()) {
1175 if (!TargetTTI->isLoweredToCall(
F))
1184 Type *Ty = U->getType();
1186 auto *
I = dyn_cast<Instruction>(U);
1190 case Instruction::Call: {
1191 assert(isa<IntrinsicInst>(U) &&
"Unexpected non-intrinsic call");
1192 auto *Intrinsic = cast<IntrinsicInst>(U);
1194 return TargetTTI->getIntrinsicInstrCost(CostAttrs,
CostKind);
1196 case Instruction::Br:
1197 case Instruction::Ret:
1198 case Instruction::PHI:
1199 case Instruction::Switch:
1200 return TargetTTI->getCFInstrCost(Opcode,
CostKind,
I);
1201 case Instruction::ExtractValue:
1202 case Instruction::Freeze:
1204 case Instruction::Alloca:
1205 if (cast<AllocaInst>(U)->isStaticAlloca())
1208 case Instruction::GetElementPtr: {
1209 const auto *
GEP = cast<GEPOperator>(U);
1210 Type *AccessType =
nullptr;
1213 if (
GEP->hasOneUser() &&
I)
1214 AccessType =
I->user_back()->getAccessType();
1216 return TargetTTI->getGEPCost(
GEP->getSourceElementType(),
1220 case Instruction::Add:
1221 case Instruction::FAdd:
1222 case Instruction::Sub:
1223 case Instruction::FSub:
1224 case Instruction::Mul:
1225 case Instruction::FMul:
1226 case Instruction::UDiv:
1227 case Instruction::SDiv:
1228 case Instruction::FDiv:
1229 case Instruction::URem:
1230 case Instruction::SRem:
1231 case Instruction::FRem:
1232 case Instruction::Shl:
1233 case Instruction::LShr:
1234 case Instruction::AShr:
1235 case Instruction::And:
1236 case Instruction::Or:
1237 case Instruction::Xor:
1238 case Instruction::FNeg: {
1241 if (Opcode != Instruction::FNeg)
1243 return TargetTTI->getArithmeticInstrCost(Opcode, Ty,
CostKind, Op1Info,
1246 case Instruction::IntToPtr:
1247 case Instruction::PtrToInt:
1248 case Instruction::SIToFP:
1249 case Instruction::UIToFP:
1250 case Instruction::FPToUI:
1251 case Instruction::FPToSI:
1252 case Instruction::Trunc:
1253 case Instruction::FPTrunc:
1254 case Instruction::BitCast:
1255 case Instruction::FPExt:
1256 case Instruction::SExt:
1257 case Instruction::ZExt:
1258 case Instruction::AddrSpaceCast: {
1260 return TargetTTI->getCastInstrCost(
1263 case Instruction::Store: {
1264 auto *SI = cast<StoreInst>(U);
1267 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1268 SI->getPointerAddressSpace(),
CostKind,
1271 case Instruction::Load: {
1275 auto *LI = cast<LoadInst>(U);
1276 Type *LoadType = U->getType();
1286 if (
const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1287 LoadType = TI->getDestTy();
1289 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1291 {TTI::OK_AnyValue, TTI::OP_None},
I);
1293 case Instruction::Select: {
1294 const Value *Op0, *Op1;
1305 return TargetTTI->getArithmeticInstrCost(
1310 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1314 case Instruction::ICmp:
1315 case Instruction::FCmp: {
1318 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1319 I ? cast<CmpInst>(
I)->getPredicate()
1323 case Instruction::InsertElement: {
1324 auto *IE = dyn_cast<InsertElementInst>(U);
1328 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[2]))
1329 if (CI->getValue().getActiveBits() <= 32)
1330 Idx = CI->getZExtValue();
1331 return TargetTTI->getVectorInstrCost(*IE, Ty,
CostKind,
Idx);
1333 case Instruction::ShuffleVector: {
1334 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1338 auto *VecTy = cast<VectorType>(U->getType());
1341 int NumSubElts, SubIndex;
1344 if (Shuffle->changesLength()) {
1346 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1349 if (Shuffle->isExtractSubvectorMask(SubIndex))
1354 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1355 return TargetTTI->getShuffleCost(
1360 int ReplicationFactor, VF;
1361 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1365 DemandedDstElts.
setBit(
I.index());
1367 return TargetTTI->getReplicationShuffleCost(
1368 VecSrcTy->getElementType(), ReplicationFactor, VF,
1372 bool IsUnary = isa<UndefValue>(
Operands[1]);
1373 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1379 if (Shuffle->increasesLength()) {
1380 for (
int &M : AdjustMask)
1381 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1383 return TargetTTI->getShuffleCost(
1397 std::iota(ExtractMask.
begin(), ExtractMask.
end(), 0);
1398 return ShuffleCost + TargetTTI->getShuffleCost(
1400 ExtractMask,
CostKind, 0, VecTy, {}, Shuffle);
1403 if (Shuffle->isIdentity())
1406 if (Shuffle->isReverse())
1410 if (Shuffle->isSelect())
1414 if (Shuffle->isTranspose())
1419 if (Shuffle->isZeroEltSplat())
1424 if (Shuffle->isSingleSource())
1429 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1430 return TargetTTI->getShuffleCost(
1435 if (Shuffle->isSplice(SubIndex))
1437 SubIndex,
nullptr,
Operands, Shuffle);
1442 case Instruction::ExtractElement: {
1443 auto *EEI = dyn_cast<ExtractElementInst>(U);
1447 if (
auto *CI = dyn_cast<ConstantInt>(
Operands[1]))
1448 if (CI->getValue().getActiveBits() <= 32)
1449 Idx = CI->getZExtValue();
1451 return TargetTTI->getVectorInstrCost(*EEI, DstTy,
CostKind,
Idx);
1461 auto *TargetTTI =
static_cast<T *
>(
this);
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool isSigned(unsigned int Opcode)
mir Rename Register Operands
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Class for arbitrary precision integers.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Convenience struct for specifying and reasoning about fast-math flags.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Represents a single loop in the control flow graph.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
TypeSize getElementOffset(unsigned Idx) const
Class to represent struct types.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
bool match(Val *V, const Pattern &P)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.