16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
86 T *thisT() {
return static_cast<T *
>(
this); }
95 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
99 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
118 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
120 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
133 "Can only extract subvectors from vectors");
135 assert((!isa<FixedVectorType>(VTy) ||
136 (
Index + NumSubElts) <=
138 "SK_ExtractSubvector index out of range");
144 for (
int i = 0; i != NumSubElts; ++i) {
146 thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
148 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
161 "Can only insert subvectors into vectors");
163 assert((!isa<FixedVectorType>(VTy) ||
164 (
Index + NumSubElts) <=
166 "SK_InsertSubvector index out of range");
172 for (
int i = 0; i != NumSubElts; ++i) {
173 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
176 thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
CostKind,
177 i +
Index,
nullptr,
nullptr);
184 return static_cast<const T *
>(
this)->getST();
189 return static_cast<const T *
>(
this)->getTLI();
211 bool IsGatherScatter,
215 if (isa<ScalableVectorType>(DataTy))
218 auto *VT = cast<FixedVectorType>(DataTy);
219 unsigned VF = VT->getNumElements();
235 VF * thisT()->getMemoryOpCost(Opcode, VT->getElementType(), Alignment,
241 Opcode == Instruction::Store,
CostKind);
255 VF * (thisT()->getCFInstrCost(Instruction::Br,
CostKind) +
256 thisT()->getCFInstrCost(Instruction::PHI,
CostKind));
259 return AddrExtractCost + MemoryOpCost + PackingCost + ConditionalCost;
269 bool IsCompared =
false;
273 return P.index() != Mask.size() - 1 || IsCompared;
274 if (
static_cast<unsigned>(
P.value()) >= NumSrcElts * 2)
277 SplatIdx =
P.value();
278 return P.index() != Mask.size() - 1;
281 return SplatIdx ==
P.value();
301 unsigned *
Fast)
const {
312 TM.getSubtargetImpl(*Caller)->getFeatureBits();
314 TM.getSubtargetImpl(*Callee)->getFeatureBits();
318 return (CallerBits & CalleeBits) == CalleeBits;
358 std::pair<const Value *, unsigned>
381 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
383 int64_t ScalableOffset = 0) {
398 Type *ScalarValTy)
const {
399 auto &&IsSupportedByTarget = [
this, ScalarMemTy, ScalarValTy](
unsigned VF) {
402 if (getTLI()->isOperationLegal(
ISD::STORE, VT) ||
412 while (VF > 2 && IsSupportedByTarget(VF))
447 int64_t Scale,
unsigned AddrSpace) {
486 unsigned &JumpTableSize,
496 unsigned N = SI.getNumCases();
501 bool IsJTAllowed = TLI->
areJTsAllowed(SI.getParent()->getParent());
507 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
508 APInt MinCaseVal = MaxCaseVal;
509 for (
auto CI : SI.cases()) {
510 const APInt &CaseVal = CI.getCaseValue()->getValue();
511 if (CaseVal.
sgt(MaxCaseVal))
512 MaxCaseVal = CaseVal;
513 if (CaseVal.
slt(MinCaseVal))
514 MinCaseVal = CaseVal;
520 for (
auto I : SI.cases())
521 Dests.
insert(
I.getCaseSuccessor());
530 if (
N < 2 || N < TLI->getMinimumJumpTableEntries())
533 (MaxCaseVal - MinCaseVal)
534 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
537 JumpTableSize =
Range;
553 if (!TM.isPositionIndependent())
563 const Triple &TargetTriple = TM.getTargetTriple();
601 case Instruction::SDiv:
602 case Instruction::SRem:
603 case Instruction::UDiv:
604 case Instruction::URem: {
653 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
654 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
661 if (isa<CallInst>(
I) || isa<InvokeInst>(
I)) {
671 <<
"advising against unrolling the loop because it "
728 std::optional<Value *>
731 bool &KnownBitsComputed) {
742 IC,
II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
746 virtual std::optional<unsigned>
748 return std::optional<unsigned>(
752 virtual std::optional<unsigned>
754 std::optional<unsigned> TargetResult =
772 unsigned NumStridedMemAccesses,
773 unsigned NumPrefetches,
774 bool HasCall)
const {
776 NumPrefetches, HasCall);
808 const APInt &DemandedElts,
809 bool Insert,
bool Extract,
814 if (isa<ScalableVectorType>(InTy))
816 auto *Ty = cast<FixedVectorType>(InTy);
819 (VL.empty() || VL.size() == Ty->getNumElements()) &&
820 "Vector size mismatch");
824 for (
int i = 0, e = Ty->getNumElements(); i < e; ++i) {
825 if (!DemandedElts[i])
828 Value *InsertedVal = VL.empty() ? nullptr : VL[i];
829 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
833 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
845 unsigned ScalarOpdIdx)
const {
863 if (isa<ScalableVectorType>(InTy))
865 auto *Ty = cast<FixedVectorType>(InTy);
868 return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
879 assert(Args.size() == Tys.size() &&
"Expected matching Args and Tys");
883 for (
int I = 0,
E = Args.size();
I !=
E;
I++) {
891 if (!isa<Constant>(
A) && UniqueOperands.
insert(
A).second) {
892 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
949 if (MTy == LK.second)
963 ArrayRef<const Value *> Args = {},
const Instruction *CxtI =
nullptr) {
965 const TargetLoweringBase *TLI = getTLI();
966 int ISD = TLI->InstructionOpcodeToISD(Opcode);
967 assert(ISD &&
"Invalid opcode");
980 InstructionCost OpCost = (IsFloat ? 2 : 1);
982 if (TLI->isOperationLegalOrPromote(ISD,
LT.second)) {
985 return LT.first * OpCost;
988 if (!TLI->isOperationExpand(ISD,
LT.second)) {
991 return LT.first * 2 * OpCost;
1003 unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
1004 InstructionCost DivCost = thisT()->getArithmeticInstrCost(
1005 DivOpc, Ty,
CostKind, Opd1Info, Opd2Info);
1006 InstructionCost MulCost =
1007 thisT()->getArithmeticInstrCost(Instruction::Mul, Ty,
CostKind);
1008 InstructionCost SubCost =
1009 thisT()->getArithmeticInstrCost(Instruction::Sub, Ty,
CostKind);
1010 return DivCost + MulCost + SubCost;
1015 if (isa<ScalableVectorType>(Ty))
1021 if (
auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
1022 InstructionCost
Cost = thisT()->getArithmeticInstrCost(
1027 SmallVector<Type *>
Tys(
Args.size(), Ty);
1049 if (isSplatMask(Mask, NumSrcElts,
Index))
1052 (
Index + Mask.size()) <= (
size_t)NumSrcElts) {
1061 Mask, NumSrcElts, NumSubElts,
Index)) {
1062 if (
Index + NumSubElts > NumSrcElts)
1095 if (
auto *FVT = dyn_cast<FixedVectorType>(Tp))
1096 return getBroadcastShuffleOverhead(FVT,
CostKind);
1104 if (
auto *FVT = dyn_cast<FixedVectorType>(Tp))
1105 return getPermuteShuffleOverhead(FVT,
CostKind);
1109 cast<FixedVectorType>(SubTp));
1112 cast<FixedVectorType>(SubTp));
1126 assert(ISD &&
"Invalid opcode");
1130 TypeSize SrcSize = SrcLT.second.getSizeInBits();
1131 TypeSize DstSize = DstLT.second.getSizeInBits();
1132 bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
1133 bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
1138 case Instruction::Trunc:
1143 case Instruction::BitCast:
1146 if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
1150 case Instruction::FPExt:
1151 if (
I && getTLI()->isExtFree(
I))
1154 case Instruction::ZExt:
1155 if (TLI->
isZExtFree(SrcLT.second, DstLT.second))
1158 case Instruction::SExt:
1159 if (
I && getTLI()->isExtFree(
I))
1169 if (DstLT.first == SrcLT.first &&
1174 case Instruction::AddrSpaceCast:
1176 Dst->getPointerAddressSpace()))
1181 auto *SrcVTy = dyn_cast<VectorType>(Src);
1182 auto *DstVTy = dyn_cast<VectorType>(Dst);
1185 if (SrcLT.first == DstLT.first &&
1190 if (!SrcVTy && !DstVTy) {
1201 if (DstVTy && SrcVTy) {
1203 if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
1206 if (Opcode == Instruction::ZExt)
1210 if (Opcode == Instruction::SExt)
1211 return SrcLT.first * 2;
1217 return SrcLT.first * 1;
1230 if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1231 DstVTy->getElementCount().isVector()) {
1234 T *
TTI =
static_cast<T *
>(
this);
1237 (!SplitSrc || !SplitDst) ?
TTI->getVectorSplitCost() : 0;
1244 if (isa<ScalableVectorType>(DstVTy))
1249 unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1251 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH,
CostKind,
I);
1264 if (Opcode == Instruction::BitCast) {
1280 return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1296 const Instruction *
I =
nullptr) {
1297 const TargetLoweringBase *TLI = getTLI();
1298 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1299 assert(ISD &&
"Invalid opcode");
1304 Op1Info, Op2Info,
I);
1308 assert(CondTy &&
"CondTy must exist");
1315 !TLI->isOperationExpand(ISD,
LT.second)) {
1318 return LT.first * 1;
1324 if (
auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1325 if (isa<ScalableVectorType>(ValTy))
1328 unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1331 InstructionCost
Cost =
1332 thisT()->getCmpSelInstrCost(Opcode, ValVTy->getScalarType(), CondTy,
1333 VecPred,
CostKind, Op1Info, Op2Info,
I);
1359 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) {
1360 return thisT()->getVectorInstrCost(Opcode, Val,
CostKind,
Index,
nullptr,
1367 Value *Op0 =
nullptr;
1368 Value *Op1 =
nullptr;
1369 if (
auto *IE = dyn_cast<InsertElementInst>(&
I)) {
1370 Op0 = IE->getOperand(0);
1371 Op1 = IE->getOperand(1);
1373 return thisT()->getVectorInstrCost(
I.getOpcode(), Val,
CostKind,
Index, Op0,
1379 const APInt &DemandedDstElts,
1382 "Unexpected size of DemandedDstElts.");
1400 Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
1403 Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
1415 assert(!Src->isVoidTy() &&
"Invalid type");
1432 LT.second.getSizeInBits())) {
1438 if (Opcode == Instruction::Store)
1447 cast<VectorType>(Src), Opcode != Instruction::Store,
1448 Opcode == Instruction::Store,
CostKind);
1459 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment,
true,
false,
1464 const Value *
Ptr,
bool VariableMask,
1468 return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1473 const Value *
Ptr,
bool VariableMask,
1480 return thisT()->getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
1487 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) {
1490 if (isa<ScalableVectorType>(VecTy))
1493 auto *VT = cast<FixedVectorType>(VecTy);
1495 unsigned NumElts = VT->getNumElements();
1496 assert(Factor > 1 && NumElts % Factor == 0 &&
"Invalid interleave factor");
1498 unsigned NumSubElts = NumElts / Factor;
1503 if (UseMaskForCond || UseMaskForGaps)
1504 Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1513 unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1533 unsigned NumLegalInsts =
divideCeil(VecTySize, VecTyLTSize);
1537 unsigned NumEltsPerLegalInst =
divideCeil(NumElts, NumLegalInsts);
1540 BitVector UsedInsts(NumLegalInsts,
false);
1541 for (
unsigned Index : Indices)
1542 for (
unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1543 UsedInsts.
set((
Index + Elt * Factor) / NumEltsPerLegalInst);
1552 "Interleaved memory op has too many members");
1558 for (
unsigned Index : Indices) {
1559 assert(
Index < Factor &&
"Invalid index for interleaved memory op");
1560 for (
unsigned Elm = 0; Elm < NumSubElts; Elm++)
1561 DemandedLoadStoreElts.
setBit(
Index + Elm * Factor);
1564 if (Opcode == Instruction::Load) {
1574 SubVT, DemandedAllSubElts,
1576 Cost += Indices.
size() * InsSubCost;
1577 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1595 SubVT, DemandedAllSubElts,
1597 Cost += ExtSubCost * Indices.
size();
1598 Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1603 if (!UseMaskForCond)
1608 Cost += thisT()->getReplicationShuffleCost(
1609 I8Type, Factor, NumSubElts,
1610 UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
1618 if (UseMaskForGaps) {
1620 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1645 std::optional<unsigned> FOp =
1648 if (ICA.
getID() == Intrinsic::vp_load) {
1650 if (
auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.
getInst()))
1651 Alignment = VPI->getPointerAlignment().valueOrOne();
1654 if (
auto *PtrTy = dyn_cast<PointerType>(ICA.
getArgTypes()[0]))
1655 AS = PtrTy->getAddressSpace();
1656 return thisT()->getMemoryOpCost(*FOp, ICA.
getReturnType(), Alignment,
1659 if (ICA.
getID() == Intrinsic::vp_store) {
1661 if (
auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.
getInst()))
1662 Alignment = VPI->getPointerAlignment().valueOrOne();
1665 if (
auto *PtrTy = dyn_cast<PointerType>(ICA.
getArgTypes()[1]))
1666 AS = PtrTy->getAddressSpace();
1667 return thisT()->getMemoryOpCost(*FOp, ICA.
getArgTypes()[0], Alignment,
1671 return thisT()->getArithmeticInstrCost(*FOp, ICA.
getReturnType(),
1675 return thisT()->getCastInstrCost(
1683 auto *UI = cast<VPCmpIntrinsic>(ICA.
getInst());
1684 return thisT()->getCmpSelInstrCost(*FOp, ICA.
getArgTypes()[0],
1691 std::optional<Intrinsic::ID> FID =
1697 "Expected VPIntrinsic to have Mask and Vector Length args and "
1705 *FID != Intrinsic::vector_reduce_fadd &&
1706 *FID != Intrinsic::vector_reduce_fmul)
1711 return thisT()->getIntrinsicInstrCost(NewICA,
CostKind);
1721 (
RetTy->isVectorTy() ? cast<VectorType>(
RetTy)->getElementCount()
1730 case Intrinsic::powi:
1731 if (
auto *RHSC = dyn_cast<ConstantInt>(Args[1])) {
1732 bool ShouldOptForSize =
I->getParent()->getParent()->hasOptSize();
1734 ShouldOptForSize)) {
1738 unsigned ActiveBits =
Exponent.getActiveBits();
1739 unsigned PopCount =
Exponent.popcount();
1741 thisT()->getArithmeticInstrCost(
1743 if (RHSC->isNegative())
1744 Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv,
RetTy,
1750 case Intrinsic::cttz:
1756 case Intrinsic::ctlz:
1762 case Intrinsic::memcpy:
1763 return thisT()->getMemcpyCost(ICA.
getInst());
1765 case Intrinsic::masked_scatter: {
1766 const Value *Mask = Args[3];
1767 bool VarMask = !isa<Constant>(Mask);
1768 Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1769 return thisT()->getGatherScatterOpCost(Instruction::Store,
1773 case Intrinsic::masked_gather: {
1774 const Value *Mask = Args[2];
1775 bool VarMask = !isa<Constant>(Mask);
1776 Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1777 return thisT()->getGatherScatterOpCost(Instruction::Load,
RetTy, Args[0],
1780 case Intrinsic::experimental_vp_strided_store: {
1783 const Value *Mask = Args[3];
1784 const Value *EVL = Args[4];
1785 bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1786 Type *EltTy = cast<VectorType>(
Data->getType())->getElementType();
1789 return thisT()->getStridedMemoryOpCost(Instruction::Store,
1790 Data->getType(),
Ptr, VarMask,
1793 case Intrinsic::experimental_vp_strided_load: {
1795 const Value *Mask = Args[2];
1796 const Value *EVL = Args[3];
1797 bool VarMask = !isa<Constant>(Mask) || !isa<Constant>(EVL);
1798 Type *EltTy = cast<VectorType>(
RetTy)->getElementType();
1801 return thisT()->getStridedMemoryOpCost(Instruction::Load,
RetTy,
Ptr,
1804 case Intrinsic::stepvector: {
1805 if (isa<ScalableVectorType>(
RetTy))
1810 case Intrinsic::vector_extract: {
1813 if (isa<ScalableVectorType>(
RetTy))
1815 unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1817 cast<VectorType>(Args[0]->
getType()), {},
1820 case Intrinsic::vector_insert: {
1823 if (isa<ScalableVectorType>(Args[1]->
getType()))
1825 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1826 return thisT()->getShuffleCost(
1830 case Intrinsic::vector_reverse: {
1832 cast<VectorType>(Args[0]->
getType()), {},
1835 case Intrinsic::vector_splice: {
1836 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1838 cast<VectorType>(Args[0]->
getType()), {},
1841 case Intrinsic::vector_reduce_add:
1842 case Intrinsic::vector_reduce_mul:
1843 case Intrinsic::vector_reduce_and:
1844 case Intrinsic::vector_reduce_or:
1845 case Intrinsic::vector_reduce_xor:
1846 case Intrinsic::vector_reduce_smax:
1847 case Intrinsic::vector_reduce_smin:
1848 case Intrinsic::vector_reduce_fmax:
1849 case Intrinsic::vector_reduce_fmin:
1850 case Intrinsic::vector_reduce_fmaximum:
1851 case Intrinsic::vector_reduce_fminimum:
1852 case Intrinsic::vector_reduce_umax:
1853 case Intrinsic::vector_reduce_umin: {
1857 case Intrinsic::vector_reduce_fadd:
1858 case Intrinsic::vector_reduce_fmul: {
1860 IID,
RetTy, {Args[0]->getType(), Args[1]->
getType()}, FMF,
I, 1);
1863 case Intrinsic::fshl:
1864 case Intrinsic::fshr: {
1865 const Value *
X = Args[0];
1866 const Value *
Y = Args[1];
1867 const Value *Z = Args[2];
1880 thisT()->getArithmeticInstrCost(BinaryOperator::Or,
RetTy,
CostKind);
1882 thisT()->getArithmeticInstrCost(BinaryOperator::Sub,
RetTy,
CostKind);
1883 Cost += thisT()->getArithmeticInstrCost(
1886 Cost += thisT()->getArithmeticInstrCost(
1891 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem,
RetTy,
1895 Type *CondTy =
RetTy->getWithNewBitWidth(1);
1897 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp,
RetTy, CondTy,
1900 thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy, CondTy,
1905 case Intrinsic::get_active_lane_mask: {
1911 if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1921 thisT()->getTypeBasedIntrinsicInstrCost(Attrs,
CostKind);
1922 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy,
RetTy,
1926 case Intrinsic::experimental_cttz_elts: {
1931 if (!getTLI()->shouldExpandCttzElements(ArgType))
1939 bool ZeroIsPoison = !cast<ConstantInt>(Args[1])->isZero();
1941 if (isa<ScalableVectorType>(ICA.
getArgTypes()[0]) &&
I &&
I->getCaller())
1950 NewEltTy, cast<VectorType>(Args[0]->
getType())->getElementCount());
1955 thisT()->getIntrinsicInstrCost(StepVecAttrs,
CostKind);
1958 thisT()->getArithmeticInstrCost(Instruction::Sub, NewVecTy,
CostKind);
1959 Cost += thisT()->getCastInstrCost(Instruction::SExt, NewVecTy,
1963 thisT()->getArithmeticInstrCost(Instruction::And, NewVecTy,
CostKind);
1966 NewEltTy, NewVecTy, FMF,
I, 1);
1967 Cost += thisT()->getTypeBasedIntrinsicInstrCost(ReducAttrs,
CostKind);
1969 thisT()->getArithmeticInstrCost(Instruction::Sub, NewEltTy,
CostKind);
1973 case Intrinsic::experimental_vector_match:
1974 return thisT()->getTypeBasedIntrinsicInstrCost(ICA,
CostKind);
1982 ScalarizationCost = 0;
1983 if (!
RetTy->isVoidTy())
1985 cast<VectorType>(
RetTy),
1987 ScalarizationCost +=
1993 return thisT()->getTypeBasedIntrinsicInstrCost(Attrs,
CostKind);
2014 unsigned VecTyIndex = 0;
2015 if (IID == Intrinsic::vector_reduce_fadd ||
2016 IID == Intrinsic::vector_reduce_fmul)
2018 assert(Tys.size() > VecTyIndex &&
"Unexpected IntrinsicCostAttributes");
2019 VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
2028 if (isa<ScalableVectorType>(
RetTy) ||
any_of(Tys, [](
const Type *Ty) {
2029 return isa<ScalableVectorType>(Ty);
2035 SkipScalarizationCost ? ScalarizationCostPassed : 0;
2036 unsigned ScalarCalls = 1;
2038 if (
auto *RetVTy = dyn_cast<VectorType>(
RetTy)) {
2039 if (!SkipScalarizationCost)
2042 ScalarCalls = std::max(ScalarCalls,
2044 ScalarRetTy =
RetTy->getScalarType();
2047 for (
Type *Ty : Tys) {
2048 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
2049 if (!SkipScalarizationCost)
2052 ScalarCalls = std::max(ScalarCalls,
2058 if (ScalarCalls == 1)
2063 thisT()->getIntrinsicInstrCost(ScalarAttrs,
CostKind);
2065 return ScalarCalls * ScalarCost + ScalarizationCost;
2069 case Intrinsic::sqrt:
2072 case Intrinsic::sin:
2075 case Intrinsic::cos:
2078 case Intrinsic::sincos:
2081 case Intrinsic::tan:
2084 case Intrinsic::asin:
2087 case Intrinsic::acos:
2090 case Intrinsic::atan:
2093 case Intrinsic::atan2:
2096 case Intrinsic::sinh:
2099 case Intrinsic::cosh:
2102 case Intrinsic::tanh:
2105 case Intrinsic::exp:
2108 case Intrinsic::exp2:
2111 case Intrinsic::exp10:
2114 case Intrinsic::log:
2117 case Intrinsic::log10:
2120 case Intrinsic::log2:
2123 case Intrinsic::fabs:
2126 case Intrinsic::canonicalize:
2129 case Intrinsic::minnum:
2132 case Intrinsic::maxnum:
2135 case Intrinsic::minimum:
2138 case Intrinsic::maximum:
2141 case Intrinsic::minimumnum:
2144 case Intrinsic::maximumnum:
2147 case Intrinsic::copysign:
2150 case Intrinsic::floor:
2153 case Intrinsic::ceil:
2156 case Intrinsic::trunc:
2159 case Intrinsic::nearbyint:
2162 case Intrinsic::rint:
2165 case Intrinsic::lrint:
2168 case Intrinsic::llrint:
2171 case Intrinsic::round:
2174 case Intrinsic::roundeven:
2177 case Intrinsic::pow:
2180 case Intrinsic::fma:
2183 case Intrinsic::fmuladd:
2186 case Intrinsic::experimental_constrained_fmuladd:
2190 case Intrinsic::lifetime_start:
2191 case Intrinsic::lifetime_end:
2192 case Intrinsic::sideeffect:
2193 case Intrinsic::pseudoprobe:
2194 case Intrinsic::arithmetic_fence:
2196 case Intrinsic::masked_store: {
2198 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2199 return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
2202 case Intrinsic::masked_load: {
2204 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2205 return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
2208 case Intrinsic::vector_reduce_add:
2209 case Intrinsic::vector_reduce_mul:
2210 case Intrinsic::vector_reduce_and:
2211 case Intrinsic::vector_reduce_or:
2212 case Intrinsic::vector_reduce_xor:
2213 return thisT()->getArithmeticReductionCost(
2216 case Intrinsic::vector_reduce_fadd:
2217 case Intrinsic::vector_reduce_fmul:
2218 return thisT()->getArithmeticReductionCost(
2220 case Intrinsic::vector_reduce_smax:
2221 case Intrinsic::vector_reduce_smin:
2222 case Intrinsic::vector_reduce_umax:
2223 case Intrinsic::vector_reduce_umin:
2224 case Intrinsic::vector_reduce_fmax:
2225 case Intrinsic::vector_reduce_fmin:
2226 case Intrinsic::vector_reduce_fmaximum:
2227 case Intrinsic::vector_reduce_fminimum:
2230 case Intrinsic::experimental_vector_match: {
2231 auto *SearchTy = cast<VectorType>(ICA.
getArgTypes()[0]);
2232 auto *NeedleTy = cast<FixedVectorType>(ICA.
getArgTypes()[1]);
2233 unsigned SearchSize = NeedleTy->getNumElements();
2238 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
2244 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy,
2246 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
2250 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy,
RetTy,
2253 thisT()->getArithmeticInstrCost(BinaryOperator::Or,
RetTy,
CostKind);
2256 thisT()->getArithmeticInstrCost(BinaryOperator::And,
RetTy,
CostKind);
2259 case Intrinsic::abs:
2262 case Intrinsic::smax:
2265 case Intrinsic::smin:
2268 case Intrinsic::umax:
2271 case Intrinsic::umin:
2274 case Intrinsic::sadd_sat:
2277 case Intrinsic::ssub_sat:
2280 case Intrinsic::uadd_sat:
2283 case Intrinsic::usub_sat:
2286 case Intrinsic::smul_fix:
2289 case Intrinsic::umul_fix:
2292 case Intrinsic::sadd_with_overflow:
2295 case Intrinsic::ssub_with_overflow:
2298 case Intrinsic::uadd_with_overflow:
2301 case Intrinsic::usub_with_overflow:
2304 case Intrinsic::smul_with_overflow:
2307 case Intrinsic::umul_with_overflow:
2310 case Intrinsic::fptosi_sat:
2313 case Intrinsic::fptoui_sat:
2316 case Intrinsic::ctpop:
2322 case Intrinsic::ctlz:
2325 case Intrinsic::cttz:
2328 case Intrinsic::bswap:
2331 case Intrinsic::bitreverse:
2334 case Intrinsic::ucmp:
2337 case Intrinsic::scmp:
2342 auto *ST = dyn_cast<StructType>(
RetTy);
2343 Type *LegalizeTy = ST ? ST->getContainedType(0) :
RetTy;
2349 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
2359 return (LT.first * 2);
2361 return (LT.first * 1);
2365 return (LT.first * 2);
2369 case Intrinsic::fmuladd: {
2373 return thisT()->getArithmeticInstrCost(BinaryOperator::FMul,
RetTy,
2375 thisT()->getArithmeticInstrCost(BinaryOperator::FAdd,
RetTy,
2378 case Intrinsic::experimental_constrained_fmuladd: {
2380 Intrinsic::experimental_constrained_fmul,
RetTy, Tys);
2382 Intrinsic::experimental_constrained_fadd,
RetTy, Tys);
2383 return thisT()->getIntrinsicInstrCost(FMulAttrs,
CostKind) +
2384 thisT()->getIntrinsicInstrCost(FAddAttrs,
CostKind);
2386 case Intrinsic::smin:
2387 case Intrinsic::smax:
2388 case Intrinsic::umin:
2389 case Intrinsic::umax: {
2391 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2392 bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
2396 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp,
RetTy, CondTy,
2398 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy, CondTy,
2402 case Intrinsic::sadd_with_overflow:
2403 case Intrinsic::ssub_with_overflow: {
2404 Type *SumTy =
RetTy->getContainedType(0);
2405 Type *OverflowTy =
RetTy->getContainedType(1);
2406 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
2407 ? BinaryOperator::Add
2408 : BinaryOperator::Sub;
2415 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy,
CostKind);
2417 2 * thisT()->getCmpSelInstrCost(Instruction::ICmp, SumTy, OverflowTy,
2419 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2423 case Intrinsic::uadd_with_overflow:
2424 case Intrinsic::usub_with_overflow: {
2425 Type *SumTy =
RetTy->getContainedType(0);
2426 Type *OverflowTy =
RetTy->getContainedType(1);
2427 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2428 ? BinaryOperator::Add
2429 : BinaryOperator::Sub;
2435 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy,
CostKind);
2436 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
2440 case Intrinsic::smul_with_overflow:
2441 case Intrinsic::umul_with_overflow: {
2442 Type *MulTy =
RetTy->getContainedType(0);
2443 Type *OverflowTy =
RetTy->getContainedType(1);
2446 bool IsSigned = IID == Intrinsic::smul_with_overflow;
2448 unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
2452 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH,
CostKind);
2454 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
2455 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
2457 Cost += thisT()->getArithmeticInstrCost(
2462 Cost += thisT()->getArithmeticInstrCost(
2463 Instruction::AShr, MulTy,
CostKind,
2467 Cost += thisT()->getCmpSelInstrCost(
2471 case Intrinsic::sadd_sat:
2472 case Intrinsic::ssub_sat: {
2474 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2478 ? Intrinsic::sadd_with_overflow
2479 : Intrinsic::ssub_with_overflow;
2486 nullptr, ScalarizationCostPassed);
2487 Cost += thisT()->getIntrinsicInstrCost(Attrs,
CostKind);
2488 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp,
RetTy, CondTy,
2490 Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy,
2494 case Intrinsic::uadd_sat:
2495 case Intrinsic::usub_sat: {
2496 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2500 ? Intrinsic::uadd_with_overflow
2501 : Intrinsic::usub_with_overflow;
2505 nullptr, ScalarizationCostPassed);
2506 Cost += thisT()->getIntrinsicInstrCost(Attrs,
CostKind);
2508 thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy, CondTy,
2512 case Intrinsic::smul_fix:
2513 case Intrinsic::umul_fix: {
2514 unsigned ExtSize =
RetTy->getScalarSizeInBits() * 2;
2515 Type *ExtTy =
RetTy->getWithNewBitWidth(ExtSize);
2518 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
2524 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
2525 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc,
RetTy, ExtTy,
2527 Cost += thisT()->getArithmeticInstrCost(
2530 Cost += thisT()->getArithmeticInstrCost(
2536 case Intrinsic::abs: {
2538 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2541 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp,
RetTy, CondTy,
2543 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select,
RetTy, CondTy,
2546 Cost += thisT()->getArithmeticInstrCost(
2551 case Intrinsic::fptosi_sat:
2552 case Intrinsic::fptoui_sat: {
2555 Type *FromTy = Tys[0];
2556 bool IsSigned = IID == Intrinsic::fptosi_sat;
2561 Cost += thisT()->getIntrinsicInstrCost(Attrs1,
CostKind);
2564 Cost += thisT()->getIntrinsicInstrCost(Attrs2,
CostKind);
2565 Cost += thisT()->getCastInstrCost(
2566 IsSigned ? Instruction::FPToSI : Instruction::FPToUI,
RetTy, FromTy,
2569 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2570 Cost += thisT()->getCmpSelInstrCost(
2572 Cost += thisT()->getCmpSelInstrCost(
2577 case Intrinsic::ucmp:
2578 case Intrinsic::scmp: {
2579 Type *CmpTy = Tys[0];
2580 Type *CondTy =
RetTy->getWithNewBitWidth(1);
2582 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
2585 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, CmpTy, CondTy,
2592 Cost += 2 * thisT()->getCmpSelInstrCost(
2593 BinaryOperator::Select,
RetTy, CondTy,
2598 2 * thisT()->getCastInstrCost(CastInst::ZExt,
RetTy, CondTy,
2600 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Sub,
RetTy,
2612 if (
auto *RetVTy = dyn_cast<VectorType>(
RetTy)) {
2614 if (isa<ScalableVectorType>(
RetTy) ||
any_of(Tys, [](
const Type *Ty) {
2615 return isa<ScalableVectorType>(Ty);
2620 SkipScalarizationCost
2621 ? ScalarizationCostPassed
2625 unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2627 for (
Type *Ty : Tys) {
2634 thisT()->getIntrinsicInstrCost(Attrs,
CostKind);
2635 for (
Type *Ty : Tys) {
2636 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
2640 ScalarCalls = std::max(ScalarCalls,
2644 return ScalarCalls * ScalarCost + ScalarizationCost;
2648 return SingleCallCost;
2670 if (!LT.first.isValid())
2674 if (
auto *FTp = dyn_cast<FixedVectorType>(Tp);
2675 Tp && LT.second.isFixedLengthVector() &&
2677 if (
auto *SubTp = dyn_cast_if_present<FixedVectorType>(
2680 return divideCeil(FTp->getNumElements(), SubTp->getNumElements());
2682 return *LT.first.getValue();
2715 if (isa<ScalableVectorType>(Ty))
2719 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2720 if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2730 return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2732 thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2736 unsigned NumReduxLevels =
Log2_32(NumVecElts);
2739 std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2740 unsigned LongVectorCount = 0;
2742 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2743 while (NumVecElts > MVTLen) {
2748 ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy,
CostKind);
2753 NumReduxLevels -= LongVectorCount;
2765 NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty,
CostKind);
2766 return ShuffleCost + ArithCost +
2767 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2791 if (isa<ScalableVectorType>(Ty))
2794 auto *VTy = cast<FixedVectorType>(Ty);
2801 return ExtractCost + ArithCost;
2805 std::optional<FastMathFlags> FMF,
2807 assert(Ty &&
"Unknown reduction vector type");
2820 if (isa<ScalableVectorType>(Ty))
2824 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2825 unsigned NumReduxLevels =
Log2_32(NumVecElts);
2828 std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2829 unsigned LongVectorCount = 0;
2831 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2832 while (NumVecElts > MVTLen) {
2845 NumReduxLevels -= LongVectorCount;
2858 return ShuffleCost + MinMaxCost +
2859 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2867 if (
auto *FTy = dyn_cast<FixedVectorType>(Ty);
2868 FTy && IsUnsigned && Opcode == Instruction::Add &&
2875 return thisT()->getCastInstrCost(Instruction::BitCast, IntTy, FTy,
2877 thisT()->getIntrinsicInstrCost(ICA,
CostKind);
2883 thisT()->getArithmeticReductionCost(Opcode, ExtTy, FMF,
CostKind);
2885 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2888 return RedCost + ExtCost;
2899 Instruction::Add, ExtTy, std::nullopt,
CostKind);
2901 IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2905 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy,
CostKind);
2907 return RedCost + MulCost + 2 * ExtCost;
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
mir Rename Register Operands
static const Function * getCalledFunction(const Value *V)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
bool sgt(const APInt &RHS) const
Signed greater than comparison.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool slt(const APInt &RHS) const
Signed less than comparison.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
size_t size() const
size - Get the array size.
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Base class which can be used to help build a TTI implementation.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx)
bool isTypeLegal(Type *Ty)
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
virtual unsigned getPrefetchDistance() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
unsigned getMaxInterleaveFactor(ElementCount VF)
unsigned getNumberOfParts(Type *Tp)
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
std::optional< unsigned > getVScaleForTuning() const
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
bool isNumRegsMajorCostOfLSR()
bool isTruncateFree(Type *Ty1, Type *Ty2)
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I)
bool isLegalICmpImmediate(int64_t imm)
bool isProfitableToHoist(Instruction *I)
virtual unsigned getMaxPrefetchIterationsAhead() const
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)
std::optional< unsigned > getMaxVScale() const
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *Ty, int &Index, VectorType *&SubTy) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
unsigned getRegUsageForType(Type *Ty)
bool shouldBuildRelLookupTables() const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
bool shouldDropLSRSolutionIfLessProfitable() const
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
bool hasBranchDivergence(const Function *F=nullptr)
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
unsigned getAssumedAddrSpace(const Value *V) const
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getEpilogueVectorizationMinVF()
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset)
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
virtual std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
bool isAlwaysUniform(const Value *V)
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
virtual bool enableWritePrefetching() const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
InstructionCost getFPOpCost(Type *Ty)
InstructionCost getVectorSplitCost()
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
bool haveFastSqrt(Type *Ty)
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
unsigned getInliningThresholdMultiplier() const
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
virtual ~BasicTTIImplBase()=default
bool isLegalAddScalableImmediate(int64_t Imm)
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
bool isVScaleKnownToBeAPowerOfTwo() const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
bool isLegalAddImmediate(int64_t imm)
bool shouldBuildLookupTables()
unsigned getFlatAddressSpace()
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, ArrayRef< Value * > VL={})
Estimate the overhead of scalarizing an instruction.
virtual unsigned getCacheLineSize() const
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool isSourceOfDivergence(const Value *V)
bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
int getInlinerVectorBonusPercent() const
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr)
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0)
bool isSingleThreaded() const
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
unsigned adjustInliningThreshold(const CallBase *CB)
bool isProfitableLSRChainElement(Instruction *I)
Concrete BasicTTIImpl that can be used if no further customization is needed.
size_type count() const
count - Returns the number of bits which are set.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
CmpInst::Predicate getLTPredicate() const
CmpInst::Predicate getGTPredicate() const
This class represents a range of values.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getFixed(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Convenience struct for specifying and reasoning about fast-math flags.
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
AttributeList getAttributes() const
Return the attribute list for this Function.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
std::optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
virtual bool enableWritePrefetching() const
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
virtual std::optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
virtual std::optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis providing profile information.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
static bool isZeroEltSplatMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
static bool isSpliceMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is a splice mask, concatenating the two inputs together and then ext...
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static bool isReverseMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
static bool isTransposeMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask is a transpose mask.
static bool isInsertSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &NumSubElts, int &Index)
Return true if this shuffle mask is an insert subvector mask.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Provides information about what library functions are available for the current target.
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
@ TypeScalarizeScalableVector
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
unsigned getBitWidthForCttzElements(Type *RetTy, ElementCount EC, bool ZeroIsPoison, const ConstantRange *VScaleRange) const
Return the minimum number of bits required to hold the maximum possible number of trailing zero vecto...
virtual bool shouldExpandCmpUsingSelects(EVT VT) const
Should we expand [US]CMP nodes using two selects and two compares, or by doing arithmetic on boolean ...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const
Return true if integer divide is usually cheaper than a sequence of several shifts,...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
virtual bool isProfitableToHoist(Instruction *I) const
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool isLegalAddScalableImmediate(int64_t) const
Return true if adding the specified scalable immediate is legal, that is the target has add instructi...
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Primary interface to the complete machine description for the target machine.
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
If the specified predicate checks whether a generic pointer falls within a specified address space,...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
ThreadModel::Model ThreadModel
ThreadModel - This flag specifies the type of threading model to assume for things like atomics.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
Triple - Helper class for working with autoconf configuration names.
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isArch64Bit() const
Test whether the architecture is 64-bit.
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
static bool isVPBinOp(Intrinsic::ID ID)
static bool isVPCast(Intrinsic::ID ID)
static bool isVPCmp(Intrinsic::ID ID)
static std::optional< unsigned > getFunctionalOpcodeForVP(Intrinsic::ID ID)
static std::optional< Intrinsic::ID > getFunctionalIntrinsicIDForVP(Intrinsic::ID ID)
static bool isVPIntrinsic(Intrinsic::ID)
static bool isVPReduction(Intrinsic::ID ID)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
Base class of all SIMD vector types.
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ BSWAP
Byte Swap and Counting operators.
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ FADD
Simple binary floating point operators.
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ SSUBO
Same for subtraction.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ SMULO
Same for multiplication.
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
ID ArrayRef< Type * > Tys
bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
constexpr bool has_single_bit(T Value) noexcept
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
constexpr unsigned BitWidth
cl::opt< unsigned > PartialUnrollingThreshold
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
ElementCount getVectorElementCount() const
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Attributes of a target dependent hardware loop.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...