24#include "llvm/IR/IntrinsicsAMDGPU.h"
32#include "AMDGPUGenCallingConv.inc"
35 "amdgpu-bypass-slow-div",
36 cl::desc(
"Skip 64-bit divide for dynamic 32-bit values"),
45 assert(StoreSize % 32 == 0 &&
"Store size not a multiple of 32");
165 {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16})
379 {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32,
380 MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
381 MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64},
388 {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32,
389 MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
390 MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32,
391 MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32,
392 MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
398 {MVT::v2f16, MVT::v2bf16, MVT::v2i16, MVT::v4f16, MVT::v4bf16,
399 MVT::v4i16, MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32,
400 MVT::v4f32, MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32,
401 MVT::v6i32, MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32,
402 MVT::v9f32, MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32,
403 MVT::v11f32, MVT::v12i32, MVT::v12f32, MVT::v16f16, MVT::v16bf16,
404 MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32,
405 MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64,
406 MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64,
407 MVT::v32i16, MVT::v32f16, MVT::v32bf16},
413 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
414 for (
MVT VT : ScalarIntVTs) {
453 for (
auto VT : {MVT::i8, MVT::i16})
457 MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32,
458 MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32};
460 for (
MVT VT : VectorIntTypes) {
478 MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32,
479 MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32};
481 for (
MVT VT : FloatVectorTypes) {
531 for (
int I = 0;
I < RTLIB::UNKNOWN_LIBCALL; ++
I) {
532 if (I < RTLIB::ATOMIC_LOAD || I > RTLIB::ATOMIC_FETCH_NAND_16)
592 const auto Flags =
Op.getNode()->getFlags();
593 if (Flags.hasNoSignedZeros())
642 unsigned Opc =
N->getOpcode();
663 return (
N->getNumOperands() > 2 &&
N->getOpcode() !=
ISD::SELECT) ||
672 return N->getValueType(0) == MVT::f32;
679 if (isa<MemSDNode>(
N))
682 switch (
N->getOpcode()) {
697 switch (
N->getConstantOperandVal(0)) {
698 case Intrinsic::amdgcn_interp_p1:
699 case Intrinsic::amdgcn_interp_p2:
700 case Intrinsic::amdgcn_interp_mov:
701 case Intrinsic::amdgcn_interp_p1_f16:
702 case Intrinsic::amdgcn_interp_p2_f16:
722 unsigned NumMayIncreaseSize = 0;
723 MVT VT =
N->getValueType(0).getScalarType().getSimpleVT();
728 for (
const SDNode *U :
N->uses()) {
763 bool ForCodeSize)
const {
765 return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
772 return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
789 EVT OldVT =
N->getValueType(0);
797 if (OldSize >= 32 && NewSize < 32 && MN->
getAlign() >=
Align(4) &&
812 return (OldSize < 32);
827 if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
832 CastTy, MMO, &
Fast) &&
848 switch (
N->getOpcode()) {
853 unsigned IntrID =
N->getConstantOperandVal(0);
855 case Intrinsic::amdgcn_readfirstlane:
856 case Intrinsic::amdgcn_readlane:
876 switch (
Op.getOpcode()) {
886 EVT VT =
Op.getValueType();
911 return VT == MVT::f32 || VT == MVT::f64 ||
919 return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16;
943 unsigned SrcSize = Source.getSizeInBits();
946 return DestSize < SrcSize && DestSize % 32 == 0 ;
952 unsigned SrcSize = Source->getScalarSizeInBits();
956 return SrcSize >= 32;
958 return DestSize < SrcSize && DestSize % 32 == 0;
962 unsigned SrcSize = Src->getScalarSizeInBits();
966 return DestSize >= 32;
968 return SrcSize == 32 && DestSize == 64;
978 return Dest == MVT::i32 ||Dest == MVT::i64 ;
980 return Src == MVT::i32 && Dest == MVT::i64;
997 "Expected shift op");
1005 if (
N->getValueType(0) == MVT::i32 &&
N->use_size() == 1 &&
1006 (
N->use_begin()->getOpcode() ==
ISD::SRA ||
1007 N->use_begin()->getOpcode() ==
ISD::SRL))
1014 auto *RHSLd = dyn_cast<LoadSDNode>(
RHS);
1015 auto *LHS0 = dyn_cast<LoadSDNode>(
LHS.getOperand(0));
1016 auto *LHS1 = dyn_cast<ConstantSDNode>(
LHS.getOperand(1));
1017 return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() ==
ISD::ZEXTLOAD &&
1018 LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() &&
1023 return !(IsShiftAndLoad(
LHS,
RHS) || IsShiftAndLoad(
RHS,
LHS));
1043 return CC_AMDGPU_CS_CHAIN;
1047 return CC_AMDGPU_Func;
1072 return RetCC_SI_Shader;
1074 return RetCC_SI_Gfx;
1078 return RetCC_AMDGPU_Func;
1117 const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset();
1124 unsigned InIndex = 0;
1127 const bool IsByRef = Arg.hasByRefAttr();
1128 Type *BaseArgTy = Arg.getType();
1129 Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;
1130 Align Alignment =
DL.getValueOrABITypeAlignment(
1131 IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy);
1132 MaxAlign = std::max(Alignment, MaxAlign);
1133 uint64_t AllocSize =
DL.getTypeAllocSize(MemArgTy);
1135 uint64_t ArgOffset =
alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
1136 ExplicitArgOffset =
alignTo(ExplicitArgOffset, Alignment) + AllocSize;
1149 for (
unsigned Value = 0, NumValues = ValueVTs.
size();
1187 }
else if (RegisterVT.
isVector()) {
1190 assert(MemoryBits % NumElements == 0);
1194 MemoryBits / NumElements);
1216 unsigned PartOffset = 0;
1217 for (
unsigned i = 0; i != NumRegs; ++i) {
1219 BasePartOffset + PartOffset,
1258 int ClobberedFI)
const {
1261 int64_t LastByte = FirstByte + MFI.
getObjectSize(ClobberedFI) - 1;
1270 if (
LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
1272 if (FI->getIndex() < 0) {
1274 int64_t InLastByte = InFirstByte;
1277 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
1278 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
1300 FuncName =
G->getSymbol();
1302 FuncName =
G->getGlobal()->getName();
1309 for (
unsigned I = 0,
E = CLI.
Ins.size();
I !=
E; ++
I)
1334 switch (
Op.getOpcode()) {
1338 "instruction is not implemented yet!");
1383 switch (
N->getOpcode()) {
1429 if (std::optional<uint32_t>
Address =
1442 Fn,
"local memory global used by non-kernel function",
1460 "Do not know what to do with an non-zero offset");
1476 EVT VT =
Op.getValueType();
1478 unsigned OpBitSize =
Op.getOperand(0).getValueType().getSizeInBits();
1479 if (OpBitSize >= 32 && OpBitSize % 32 == 0) {
1480 unsigned NewNumElt = OpBitSize / 32;
1481 EVT NewEltVT = (NewNumElt == 1) ? MVT::i32
1483 MVT::i32, NewNumElt);
1484 for (
const SDUse &U :
Op->ops()) {
1490 Args.push_back(NewIn);
1500 for (
const SDUse &U :
Op->ops())
1510 unsigned Start =
Op.getConstantOperandVal(1);
1511 EVT VT =
Op.getValueType();
1512 EVT SrcVT =
Op.getOperand(0).getValueType();
1517 assert(NumElt % 2 == 0 && NumSrcElt % 2 == 0 &&
"expect legal types");
1521 EVT NewVT = NumElt == 2
1633 if ((
LHS == True &&
RHS == False) || (
LHS == False &&
RHS == True))
1652 if (
LHS == NegTrue && CFalse && CRHS) {
1666std::pair<SDValue, SDValue>
1678 return std::pair(
Lo,
Hi);
1707 HiVT = NumElts - LoNumElts == 1
1710 return std::pair(LoVT, HiVT);
1715std::pair<SDValue, SDValue>
1717 const EVT &LoVT,
const EVT &HiVT,
1721 N.getValueType().getVectorNumElements() &&
1722 "More vector elements requested than available!");
1728 return std::pair(
Lo,
Hi);
1734 EVT VT =
Op.getValueType();
1746 SDValue BasePtr = Load->getBasePtr();
1747 EVT MemVT = Load->getMemoryVT();
1752 EVT LoMemVT, HiMemVT;
1760 Align BaseAlign = Load->getAlign();
1764 Load->getChain(), BasePtr, SrcValue, LoMemVT,
1765 BaseAlign, Load->getMemOperand()->getFlags());
1768 DAG.
getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
1770 HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
1794 EVT VT =
Op.getValueType();
1795 SDValue BasePtr = Load->getBasePtr();
1796 EVT MemVT = Load->getMemoryVT();
1799 Align BaseAlign = Load->getAlign();
1804 if (NumElements != 3 ||
1805 (BaseAlign <
Align(8) &&
1809 assert(NumElements == 3);
1816 Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,
1817 WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
1828 SDValue Val = Store->getValue();
1836 EVT MemVT = Store->getMemoryVT();
1837 SDValue Chain = Store->getChain();
1838 SDValue BasePtr = Store->getBasePtr();
1842 EVT LoMemVT, HiMemVT;
1852 Align BaseAlign = Store->getAlign();
1857 DAG.
getTruncStore(Chain, SL,
Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
1858 Store->getMemOperand()->getFlags());
1861 HiMemVT, HiAlign, Store->getMemOperand()->getFlags());
1872 EVT VT =
Op.getValueType();
1875 MVT IntVT = MVT::i32;
1876 MVT FltVT = MVT::f32;
1879 if (LHSSignBits < 9)
1883 if (RHSSignBits < 9)
1887 unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
1888 unsigned DivBits = BitSize - SignBits;
1932 bool UseFmadFtz =
false;
1933 if (Subtarget->
isGCN()) {
1988 EVT VT =
Op.getValueType();
1990 assert(VT == MVT::i64 &&
"LowerUDIVREM64 expects an i64");
2063 std::tie(Mulhi1_Lo, Mulhi1_Hi) =
2076 std::tie(Mulhi2_Lo, Mulhi2_Hi) =
2090 std::tie(Mul3_Lo, Mul3_Hi) = DAG.
SplitScalar(Mul3,
DL, HalfVT, HalfVT);
2169 for (
unsigned i = 0; i < halfBitWidth; ++i) {
2170 const unsigned bitPos = halfBitWidth - i - 1;
2201 EVT VT =
Op.getValueType();
2203 if (VT == MVT::i64) {
2209 if (VT == MVT::i32) {
2256 EVT VT =
Op.getValueType();
2264 if (VT == MVT::i32) {
2269 if (VT == MVT::i64 &&
2316 EVT VT =
Op.getValueType();
2317 auto Flags =
Op->getFlags();
2355 const unsigned FractBits = 52;
2356 const unsigned ExpBits = 11;
2372 assert(
Op.getValueType() == MVT::f64);
2382 const unsigned FractBits = 52;
2394 = DAG.
getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
2419 assert(
Op.getValueType() == MVT::f64);
2452 auto VT =
Op.getValueType();
2453 auto Arg =
Op.getOperand(0u);
2465 EVT VT =
Op.getValueType();
2516 switch (Src.getOpcode()) {
2518 return Src.getOperand(0).getValueType() == MVT::f16;
2523 unsigned IntrinsicID = Src.getConstantOperandVal(0);
2524 switch (IntrinsicID) {
2525 case Intrinsic::amdgcn_frexp_mant:
2540 if (Flags.hasApproximateFuncs())
2559 EVT VT = Src.getValueType();
2570 return IsLtSmallestNormal;
2576 EVT VT = Src.getValueType();
2589std::pair<SDValue, SDValue>
2610 return {ScaledInput, IsLtSmallestNormal};
2621 EVT VT =
Op.getValueType();
2625 if (VT == MVT::f16) {
2634 auto [ScaledInput, IsLtSmallestNormal] =
2657 EVT VT =
Op.getValueType();
2665 if (VT == MVT::f16 || Flags.hasApproximateFuncs() ||
2691 const float c_log10 = 0x1.344134p-2f;
2692 const float cc_log10 = 0x1.09f79ep-26f;
2695 const float c_log = 0x1.62e42ep-1f;
2696 const float cc_log = 0x1.efa39ep-25f;
2708 const float ch_log10 = 0x1.344000p-2f;
2709 const float ct_log10 = 0x1.3509f6p-18f;
2712 const float ch_log = 0x1.62e000p-1f;
2713 const float ct_log = 0x1.0bfbe8p-15f;
2730 const bool IsFiniteOnly = (Flags.hasNoNaNs() ||
Options.NoNaNsFPMath) &&
2731 (Flags.hasNoInfs() ||
Options.NoInfsFPMath);
2734 if (!IsFiniteOnly) {
2760 EVT VT = Src.getValueType();
2764 double Log2BaseInverted =
2767 if (VT == MVT::f32) {
2777 ScaledResultOffset, Zero, Flags);
2792 return DAG.
getNode(
ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand,
2801 EVT VT =
Op.getValueType();
2805 if (VT == MVT::f16) {
2850 EVT VT =
X.getValueType();
2858 SL, VT,
Mul, Flags);
2890 const EVT VT =
X.getValueType();
2940 EVT VT =
Op.getValueType();
3003 const float cc_exp = 0x1.4ae0bep-26f;
3004 const float c_exp10 = 0x1.a934f0p+1f;
3005 const float cc_exp10 = 0x1.2f346ep-24f;
3015 const float ch_exp = 0x1.714000p+0f;
3016 const float cl_exp = 0x1.47652ap-12f;
3018 const float ch_exp10 = 0x1.a92000p+1f;
3019 const float cl_exp10 = 0x1.4f0978p-11f;
3034 PL =
getMad(DAG, SL, VT, XH, CL, Mad0, Flags);
3049 DAG.
getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT);
3059 if (!Flags.hasNoInfs() && !
Options.NoInfsFPMath) {
3061 DAG.
getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);
3083 auto Arg =
Op.getOperand(0u);
3084 auto ResultVT =
Op.getValueType();
3086 if (ResultVT != MVT::i8 && ResultVT != MVT::i16)
3090 assert(ResultVT == Arg.getValueType());
3092 auto const LeadingZeroes = 32u - ResultVT.getFixedSizeInBits();
3093 auto SubVal = DAG.
getConstant(LeadingZeroes, SL, MVT::i32);
3095 NewOp = DAG.
getNode(
Op.getOpcode(), SL, MVT::i32, NewOp);
3110 bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;
3112 if (Src.getValueType() == MVT::i32 || Is64BitScalar) {
3126 Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);
3146 OprLo = DAG.
getNode(AddOpc, SL, MVT::i32, OprLo, Const32);
3148 OprHi = DAG.
getNode(AddOpc, SL, MVT::i32, OprHi, Const32);
3265 if (Subtarget->
isGCN())
3308 EVT DestVT =
Op.getValueType();
3310 EVT SrcVT = Src.getValueType();
3312 if (SrcVT == MVT::i16) {
3313 if (DestVT == MVT::f16)
3322 if (DestVT == MVT::bf16) {
3329 if (SrcVT != MVT::i64)
3344 if (DestVT == MVT::f32)
3347 assert(DestVT == MVT::f64);
3353 EVT DestVT =
Op.getValueType();
3356 EVT SrcVT = Src.getValueType();
3358 if (SrcVT == MVT::i16) {
3359 if (DestVT == MVT::f16)
3368 if (DestVT == MVT::bf16) {
3375 if (SrcVT != MVT::i64)
3393 if (DestVT == MVT::f32)
3396 assert(DestVT == MVT::f64);
3405 EVT SrcVT = Src.getValueType();
3407 assert(SrcVT == MVT::f32 || SrcVT == MVT::f64);
3420 if (
Signed && SrcVT == MVT::f32) {
3433 if (SrcVT == MVT::f64) {
3435 llvm::bit_cast<double>(UINT64_C( 0x3df0000000000000)), SL,
3438 llvm::bit_cast<double>(UINT64_C( 0xc1f0000000000000)), SL,
3442 llvm::bit_cast<float>(UINT32_C( 0x2f800000)), SL, SrcVT);
3444 llvm::bit_cast<float>(UINT32_C( 0xcf800000)), SL, SrcVT);
3455 SL, MVT::i32, FloorMul);
3461 if (
Signed && SrcVT == MVT::f32) {
3491 const unsigned ExpMask = 0x7ff;
3492 const unsigned ExpBiasf64 = 1023;
3493 const unsigned ExpBiasf16 = 15;
3577 unsigned OpOpcode =
Op.getOpcode();
3578 EVT SrcVT = Src.getValueType();
3579 EVT DestVT =
Op.getValueType();
3582 if (SrcVT == MVT::f16 && DestVT == MVT::i16)
3585 if (SrcVT == MVT::bf16) {
3588 return DAG.
getNode(
Op.getOpcode(),
DL, DestVT, PromotedSrc);
3592 if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
3599 if (DestVT != MVT::i64)
3602 if (SrcVT == MVT::f16 ||
3609 return DAG.
getNode(Ext,
DL, MVT::i64, FpToInt32);
3612 if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
3620 EVT ExtraVT = cast<VTSDNode>(
Op.getOperand(1))->getVT();
3621 MVT VT =
Op.getSimpleValueType();
3635 for (
unsigned I = 0;
I < NElts; ++
I)
3650 EVT VT =
Op.getValueType();
3664 unsigned NewOpcode = Node24->
getOpcode();
3668 case Intrinsic::amdgcn_mul_i24:
3671 case Intrinsic::amdgcn_mul_u24:
3674 case Intrinsic::amdgcn_mulhi_i24:
3677 case Intrinsic::amdgcn_mulhi_u24:
3692 if (DemandedLHS || DemandedRHS)
3694 DemandedLHS ? DemandedLHS :
LHS,
3695 DemandedRHS ? DemandedRHS :
RHS);
3707template <
typename IntTy>
3710 if (Width +
Offset < 32) {
3712 IntTy Result =
static_cast<IntTy
>(Shl) >> (32 - Width);
3721 if (
MemSDNode *M = dyn_cast<MemSDNode>(U)) {
3722 if (M->isVolatile())
3870 EVT ExtVT = cast<VTSDNode>(N1)->getVT();
3874 EVT SrcVT = Src.getValueType();
3875 if (SrcVT.
bitsGE(ExtVT)) {
3886 unsigned IID =
N->getConstantOperandVal(0);
3888 case Intrinsic::amdgcn_mul_i24:
3889 case Intrinsic::amdgcn_mul_u24:
3890 case Intrinsic::amdgcn_mulhi_i24:
3891 case Intrinsic::amdgcn_mulhi_u24:
3893 case Intrinsic::amdgcn_fract:
3894 case Intrinsic::amdgcn_rsq:
3895 case Intrinsic::amdgcn_rcp_legacy:
3896 case Intrinsic::amdgcn_rsq_legacy:
3897 case Intrinsic::amdgcn_rsq_clamp: {
3900 return Src.isUndef() ? Src :
SDValue();
3902 case Intrinsic::amdgcn_frexp_exp: {
3908 if (PeekSign == Src)
3945 EVT VT =
N->getValueType(0);
3952 unsigned RHSVal =
RHS->getZExtValue();
3959 switch (
LHS->getOpcode()) {
3967 if (VT == MVT::i32 && RHSVal == 16 &&
X.getValueType() == MVT::i16 &&
3983 EVT XVT =
X.getValueType();
4013 if (
N->getValueType(0) != MVT::i64)
4022 unsigned RHSVal =
RHS->getZExtValue();
4048 auto *
RHS = dyn_cast<ConstantSDNode>(
N->getOperand(1));
4052 EVT VT =
N->getValueType(0);
4054 unsigned ShiftAmt =
RHS->getZExtValue();
4061 if (
auto *Mask = dyn_cast<ConstantSDNode>(
LHS.getOperand(1))) {
4062 unsigned MaskIdx, MaskLen;
4063 if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&
4064 MaskIdx == ShiftAmt) {
4098 EVT VT =
N->getValueType(0);
4103 SDValue Vec = Src.getOperand(0);
4123 if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) {
4145 EVT SrcVT = Src.getValueType();
4150 SDValue Amt = Src.getOperand(1);
4157 const unsigned MaxCstSize =
4192 return DAG.
getNode(MulOpc, SL, MVT::i32, N0, N1);
4216 EVT VT =
N->getValueType(0);
4222 if (!
N->isDivergent())
4244 if (V.hasOneUse() ||
all_of(V->uses(), [](
const SDNode *U) ->
bool {
4245 return U->getOpcode() == ISD::MUL;
4254 if (
SDValue MulOper = IsFoldableAdd(N0)) {
4259 if (
SDValue MulOper = IsFoldableAdd(N1)) {
4300 if (
N->getValueType(0) != MVT::i32)
4320 unsigned LoOpcode, HiOpcode;
4343 EVT VT =
N->getValueType(0);
4354 if (Subtarget->
hasSMulHi() && !
N->isDivergent())
4376 EVT VT =
N->getValueType(0);
4387 if (Subtarget->
hasSMulHi() && !
N->isDivergent())
4410 unsigned Opc)
const {
4411 EVT VT =
Op.getValueType();
4414 LegalVT != MVT::i16))
4451 return getFFBX_U32(DAG, CmpLHS, SL, Opc);
4462 return getFFBX_U32(DAG, CmpLHS, SL, Opc);
4480 return DAG.
getNode(
Op, SL, VT, NewSelect);
4498 EVT VT =
N.getValueType();
4525 bool ShouldFoldNeg =
true;
4530 ShouldFoldNeg =
false;
4532 ShouldFoldNeg =
false;
4535 if (ShouldFoldNeg) {
4559 Cond, NewLHS, NewRHS);
4561 return DAG.
getNode(
LHS.getOpcode(), SL, VT, NewSelect);
4577 EVT VT =
N->getValueType(0);
4585 if (
Cond.hasOneUse()) {
4595 getSetCCInverse(cast<CondCodeSDNode>(
CC)->
get(),
LHS.getValueType());
4697 EVT VT =
N->getValueType(0);
4816 for (
unsigned I = 0;
I < 3; ++
I)
4882 EVT SrcVT = Src.getValueType();
4918 Ops.
back() = CastBack;
4968 EVT SrcVT = Src.getValueType();
4982 const auto *CFP = dyn_cast<ConstantFPSDNode>(
N->getOperand(0));
4987 const APFloat &Val = CFP->getValueAPF();
4997 switch(
N->getOpcode()) {
5001 EVT DestVT =
N->getValueType(0);
5013 EVT SrcVT = Src.getValueType();
5049 const APInt &Val =
C->getValueAPF().bitcastToAPInt();
5107 assert(!
N->getValueType(0).isVector() &&
5108 "Vector handling of BFE not implemented");
5121 SDValue BitsFrom =
N->getOperand(0);
5126 if (OffsetVal == 0) {
5128 unsigned SignBits =
Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
5131 if (OpSignBits >= SignBits)
5151 return constantFoldBFE<int32_t>(DAG,
5152 CVal->getSExtValue(),
5158 return constantFoldBFE<uint32_t>(DAG,
5159 CVal->getZExtValue(),
5165 if ((OffsetVal + WidthVal) >= 32 &&
5166 !(Subtarget->
hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {
5169 BitsFrom, ShiftVal);
5175 OffsetVal + WidthVal);
5205 EVT VT =
N->getValueType(0);
5212 if (N0CFP && N1CFP && N2CFP) {
5213 const auto FTZ = [](
const APFloat &V) {
5214 if (V.isDenormal()) {
5215 APFloat Zero(V.getSemantics(), 0);
5216 return V.isNegative() ? -Zero : Zero;
5243 bool RawReg)
const {
5248 if (!
MRI.isLiveIn(Reg)) {
5249 VReg =
MRI.createVirtualRegister(RC);
5250 MRI.addLiveIn(Reg, VReg);
5252 VReg =
MRI.getLiveInVirtReg(Reg);
5314 assert(Arg &&
"Attempting to load missing argument");
5323 unsigned Mask = Arg.
getMask();
5324 unsigned Shift = llvm::countr_zero<unsigned>(Mask);
5336 alignTo(ExplicitKernArgSize, Alignment) + ExplicitArgOffset;
5356#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
5524 int &RefinementSteps,
5525 bool &UseOneConstNR,
5526 bool Reciprocal)
const {
5529 if (VT == MVT::f32) {
5530 RefinementSteps = 0;
5542 int &RefinementSteps)
const {
5545 if (VT == MVT::f32) {
5551 RefinementSteps = 0;
5563 case Intrinsic::amdgcn_workitem_id_x:
5565 case Intrinsic::amdgcn_workitem_id_y:
5567 case Intrinsic::amdgcn_workitem_id_z:
5580 unsigned Opc =
Op.getOpcode();
5623 LHSKnown = LHSKnown.
trunc(24);
5624 RHSKnown = RHSKnown.
trunc(24);
5629 unsigned MaxValBits = LHSValBits + RHSValBits;
5630 if (MaxValBits > 32)
5632 unsigned SignBits = 32 - MaxValBits + 1;
5640 if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))
5642 else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))
5647 unsigned MaxValBits = LHSValBits + RHSValBits;
5648 if (MaxValBits >= 32)
5663 for (
unsigned I = 0;
I < 32;
I += 8) {
5664 unsigned SelBits = Sel & 0xff;
5669 }
else if (SelBits < 7) {
5670 SelBits = (SelBits & 3) * 8;
5673 }
else if (SelBits == 0x0c) {
5674 Known.
Zero |= 0xFFull <<
I;
5675 }
else if (SelBits > 0x0c) {
5676 Known.
One |= 0xFFull <<
I;
5691 auto GA = cast<GlobalAddressSDNode>(
Op.getOperand(0).getNode());
5722 unsigned IID =
Op.getConstantOperandVal(0);
5724 case Intrinsic::amdgcn_workitem_id_x:
5725 case Intrinsic::amdgcn_workitem_id_y:
5726 case Intrinsic::amdgcn_workitem_id_z: {
5741 unsigned Depth)
const {
5742 switch (
Op.getOpcode()) {
5754 return std::max(SignBits, Op0SignBits);
5759 return Width ? 32 - (Width->
getZExtValue() & 0x1f) : 1;
5793 return std::min(Tmp0, std::min(Tmp1, Tmp2));
5803 unsigned Depth)
const {
5809 switch (
MI->getOpcode()) {
5810 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
5812 case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
5814 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
5816 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
5818 case AMDGPU::G_AMDGPU_SMED3:
5819 case AMDGPU::G_AMDGPU_UMED3: {
5820 auto [Dst, Src0, Src1, Src2] =
MI->getFirst4Regs();
5821 unsigned Tmp2 =
Analysis.computeNumSignBits(Src2, DemandedElts,
Depth + 1);
5824 unsigned Tmp1 =
Analysis.computeNumSignBits(Src1, DemandedElts,
Depth + 1);
5827 unsigned Tmp0 =
Analysis.computeNumSignBits(Src0, DemandedElts,
Depth + 1);
5830 return std::min(Tmp0, std::min(Tmp1, Tmp2));
5840 unsigned Depth)
const {
5841 unsigned Opcode =
Op.getOpcode();
5904 unsigned IntrinsicID =
Op.getConstantOperandVal(0);
5906 switch (IntrinsicID) {
5907 case Intrinsic::amdgcn_cubeid:
5910 case Intrinsic::amdgcn_frexp_mant: {
5915 case Intrinsic::amdgcn_cvt_pkrtz: {
5921 case Intrinsic::amdgcn_rcp:
5922 case Intrinsic::amdgcn_rsq:
5923 case Intrinsic::amdgcn_rcp_legacy:
5924 case Intrinsic::amdgcn_rsq_legacy:
5925 case Intrinsic::amdgcn_rsq_clamp: {
5932 case Intrinsic::amdgcn_trig_preop:
5933 case Intrinsic::amdgcn_fdot2:
5936 case Intrinsic::amdgcn_fma_legacy:
5953 return MRI.hasOneNonDBGUse(N0);
5966 if (
auto *IntTy = dyn_cast<IntegerType>(RMW->
getType())) {
5967 unsigned Size = IntTy->getBitWidth();
5982 using namespace PatternMatch;
5984 for (
auto &
Op :
I->operands()) {
5986 if (
any_of(Ops, [&](
Use *U) {
return U->get() ==
Op.get(); }))
5993 return !Ops.
empty();
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static LLVM_READONLY bool hasSourceMods(const MachineInstr &MI)
static bool isInv2Pi(const APFloat &APF)
static LLVM_READONLY bool opMustUseVOP3Encoding(const MachineInstr &MI, const MachineRegisterInfo &MRI)
returns true if the operation will definitely need to use a 64-bit encoding, and thus will use a VOP3...
static unsigned inverseMinMax(unsigned Opc)
static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, SelectionDAG &DAG)
static unsigned workitemIntrinsicDim(unsigned ID)
static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size, int64_t Offset)
static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, uint32_t Width, const SDLoc &DL)
static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X, SDValue Y, SDValue C, SDNodeFlags Flags=SDNodeFlags())
static SDValue getAddOneOp(const SDNode *V)
If V is an add of a constant 1, returns the other operand.
#define NODE_NAME_CASE(node)
static LLVM_READONLY bool selectSupportsSourceMods(const SDNode *N)
Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the type for ISD::SELECT.
static cl::opt< bool > AMDGPUBypassSlowDiv("amdgpu-bypass-slow-div", cl::desc("Skip 64-bit divide for dynamic 32-bit values"), cl::init(true))
static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, SDValue N0, SDValue N1, unsigned Size, bool Signed)
static bool fnegFoldsIntoOp(const SDNode *N)
static bool isI24(SDValue Op, SelectionDAG &DAG)
static bool isCttzOpc(unsigned Opc)
static bool isU24(SDValue Op, SelectionDAG &DAG)
static SDValue peekFPSignOps(SDValue Val)
static bool valueIsKnownNeverF32Denorm(SDValue Src)
Return true if it's known that Src can never be an f32 denormal value.
static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, unsigned Op, const SDLoc &SL, SDValue Cond, SDValue N1, SDValue N2)
static SDValue peekFNeg(SDValue Val)
static SDValue simplifyMul24(SDNode *Node24, TargetLowering::DAGCombinerInfo &DCI)
static bool isCtlzOpc(unsigned Opc)
static LLVM_READNONE bool fnegFoldsIntoOpcode(unsigned Opc)
static bool hasVolatileUser(SDNode *Val)
Interface definition of the TargetLowering class that is common to all AMD GPUs.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Function Alias Analysis Results
block Block Frequency Analysis
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
static cl::opt< unsigned > CostThreshold("dfa-cost-threshold", cl::desc("Maximum cost accepted for the transformation"), cl::Hidden, cl::init(50))
static Error getAddrSpace(StringRef R, unsigned &AddrSpace)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Provides analysis for querying information about KnownBits during GISel passes.
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
static bool isUniformMMO(const MachineMemOperand *MMO)
uint64_t getExplicitKernArgSize() const
static std::optional< uint32_t > getLDSAbsoluteAddress(const GlobalValue &GV)
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)
bool isModuleEntryFunction() const
bool hasFminFmaxLegacy() const
Align getAlignmentForImplicitArgPtr() const
bool hasMadMacF32Insts() const
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const
Return the maximum workitem ID value in the function, for the given (0, 1, 2) dimension.
bool has16BitInsts() const
bool hasFastFMAF32() const
unsigned getExplicitKernelArgOffset() const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
bool hasInv2PiInlineImm() const
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG)
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType ExtendKind) const override
Return the type that should be used to zero or sign extend a zeroext/signext integer return value.
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into 2 loads of half the vector.
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const
SDValue storeStackInputValue(SelectionDAG &DAG, const SDLoc &SL, SDValue Chain, SDValue ArgVal, int64_t Offset) const
bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem, unsigned AS) const override
Return true if it is expected to be cheaper to do a store of vector constant with the given size and ...
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool shouldCombineMemoryType(EVT VT) const
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, unsigned Opc, SDValue LHS, uint32_t ValLo, uint32_t ValHi) const
Split the 64-bit value LHS into two 32-bit components, and perform the binary operation Opc to it wit...
SDValue lowerUnhandledCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals, StringRef Reason) const
SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isTruncateFree(EVT Src, EVT Dest) const override
bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const
TargetLowering::NegatibleCost getConstantNegateCost(const ConstantFPSDNode *C) const
SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, bool IsLog10, SDNodeFlags Flags) const
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isSDNodeAlwaysUniform(const SDNode *N) const override
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const
Split a vector store into multiple scalar stores.
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const
SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo &MFI, int ClobberedFI) const
bool isConstantCheaperToNegate(SDValue N) const
bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, Register N1) const override
static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, SDNodeFlags Flags)
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
bool isConstantCostlierToNegate(SDValue N) const
SDValue loadInputValue(SelectionDAG &DAG, const TargetRegisterClass *RC, EVT VT, const SDLoc &SL, const ArgDescriptor &Arg) const
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const
SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags) const
Emit approx-funcs appropriate lowering for exp10.
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const
SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isSelectSupported(SelectSupportKind) const override
bool isZExtFree(Type *Src, Type *Dest) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const
SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const
bool mayIgnoreSignedZero(SDValue Op) const
SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const
bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, const MachineMemOperand &MMO) const final
Return true if the following transform is beneficial: fold (conv (load x)) -> (load (conv*)x) On arch...
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT) const override
Return true if it is profitable to reduce a load to a smaller type.
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
std::pair< SDValue, SDValue > splitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HighVT, SelectionDAG &DAG) const
Split a vector value into two parts of types LoVT and HiVT.
SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const
SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, SDValue N) const
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Whether it is profitable to sink the operands of an Instruction I to the basic block of I.
SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const
bool isFAbsFree(EVT VT) const override
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
SDValue loadStackInputValue(SelectionDAG &DAG, EVT VT, const SDLoc &SL, int64_t Offset) const
Similar to CreateLiveInRegister, except value maybe loaded from a stack slot rather than passed in a ...
bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override
Return true if it's profitable to narrow operations of type SrcVT to DestVT.
SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT)
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const override
Hooks for building estimates in place of slower divisions and square roots.
unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, const MachineRegisterInfo &MRI, unsigned Depth=0) const override
This method can be implemented by targets that want to expose additional information about sign bits ...
SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
static SDValue stripBitcast(SDValue Val)
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT, const SDLoc &SL, bool RawReg=false) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const
Split a vector store into 2 stores of half the vector.
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, NegatibleCost &Cost, unsigned Depth) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
std::pair< SDValue, SDValue > split64BitValue(SDValue Op, SelectionDAG &DAG) const
Return 64-bit value Op as two 32-bit integers.
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const override
Return a reciprocal estimate value for the input operand.
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI)
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
static CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg)
std::pair< SDValue, SDValue > getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, SDValue Op, SDNodeFlags Flags) const
If denormal handling is required return the scaled input to FLOG2, and the check for denormal range.
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
static bool allUsesHaveSourceMods(const SDNode *N, unsigned CostThreshold=4)
SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN=false, unsigned Depth=0) const override
If SNaN is false,.
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG)
SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, SDNodeFlags Flags) const
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags)
bool ShouldShrinkFPConstant(EVT VT) const override
If true, then instruction selection should seek to shrink the FP constant of the specified type to a ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const
SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const
static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc)
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const
SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const
Widen a suitably aligned v3 load.
std::pair< EVT, EVT > getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const
Split a vector type into two parts.
SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const
SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
bool bitwiseIsEqual(const APFloat &RHS) const
opStatus add(const APFloat &RHS, roundingMode RM)
const fltSemantics & getSemantics() const
opStatus multiply(const APFloat &RHS, roundingMode RM)
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
void setLowBits(unsigned loBits)
Set the bottom loBits bits.
This class represents an incoming formal argument to a Function.
an instruction that atomically reads a memory location, combines it with another value,...
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
BinOp getOperation() const
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
LLVMContext & getContext() const
void addLoc(const CCValAssign &V)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
const APFloat & getValueAPF() const
bool isNegative() const
Return true if the value is negative.
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
uint64_t getNumOperands() const
bool print(raw_ostream &OS, DIDumpOptions DumpOpts, const DWARFExpression *Expr, DWARFUnit *U) const
A parsed version of the target data layout string in and methods for querying it.
Diagnostic information for unsupported feature in backend.
iterator_range< arg_iterator > args()
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Module * getParent()
Get the module that this global value is contained inside of...
This is an important class for using LLVM in a threaded context.
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
static auto integer_fixedlen_vector_valuetypes()
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
int getObjectIndexBegin() const
Return the minimum frame object index.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const
Returns the denormal handling type for the default rounding mode of the function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
A description of a memory reference used in the backend.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
op_iterator op_end() const
op_iterator op_begin() const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
SIModeRegisterDefaults getMode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
unsigned ComputeMaxSignificantBits(SDValue Op, unsigned Depth=0) const
Get the upper bound on bit size for this Value Op as a signed integer.
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
bool isConstantValueOfAnyType(SDValue N) const
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
const TargetMachine & getTarget() const
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
bool isKnownNeverNaN(SDValue Op, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes=true)
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth)
Tells the code generator which bitwidths to bypass.
void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum fp convert the backend supports.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setSupportsUnalignedAtomics(bool UnalignedSupported)
Sets whether unaligned atomic operations are supported.
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth=0) const
More limited version of SimplifyDemandedBits that can be used to "look through" ops that don't contri...
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const
Check to see if the specified operand of the specified instruction is a constant integer.
std::pair< SDValue, SDValue > expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Expands an unaligned load to 2 half-size loads for an integer, and possibly more for vectors.
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Primary interface to the complete machine description for the target machine.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
StringRef getName() const
Return a constant reference to the value's name.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ FIRST_MEM_OPCODE_NUMBER
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
@ BSWAP
Byte Swap and Counting operators.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ FADD
Simple binary floating point operators.
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SIGN_EXTEND
Conversion operators.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
@ EntryToken
EntryToken - This is the marker used to indicate the start of a region.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ INLINEASM
INLINEASM - Represents an inline asm block.
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
bool match(Val *V, const Pattern &P)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool getAlign(const Function &F, unsigned index, unsigned &align)
ConstantFPSDNode * isConstOrConstSplatFP(SDValue N, bool AllowUndefs=false)
Returns the SDNode if it is a constant splat BuildVector or constant float.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< EVT > *MemVTs, SmallVectorImpl< TypeSize > *Offsets=nullptr, TypeSize StartingOffset=TypeSize::getZero())
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
APFloat neg(APFloat X)
Returns the negated value of the argument.
unsigned Log2(Align A)
Returns the log2 of the alignment.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
static const fltSemantics & IEEEsingle() LLVM_READNONE
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEdouble() LLVM_READNONE
static const fltSemantics & IEEEhalf() LLVM_READNONE
This struct is a compact representation of a valid (non-zero power of two) alignment.
MCRegister getRegister() const
unsigned getStackOffset() const
DenormalModeKind Input
Denormal treatment kind for floating point instruction inputs in the default floating-point environme...
@ PreserveSign
The sign of a flushed-to-zero number is preserved in the sign of 0.
static constexpr DenormalMode getPreserveSign()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
EVT getPow2VectorType(LLVMContext &Context) const
Widens the length of the given vector EVT up to the nearest power of 2 and returns that type.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
bool isByteSized() const
Return true if the bit size is a multiple of 8.
uint64_t getScalarSizeInBits() const
EVT getHalfSizedIntegerVT(LLVMContext &Context) const
Finds the smallest simple value type that is greater than or equal to half the width of this EVT.
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
bool isVector() const
Return true if this is a vector value type.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
bool isUnknown() const
Returns true if we don't know any bits.
KnownBits trunc(unsigned BitWidth) const
Return known bits for a truncation of the value we're tracking.
unsigned getBitWidth() const
Get the bit width of this value.
void resetAll()
Resets the known state of all bits.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
bool isStrictlyPositive() const
Returns true if this value is known to be positive.
bool isNegative() const
Returns true if this value is known to be negative.
unsigned countMaxSignificantBits() const
Returns the maximum number of bits needed to represent all possible signed values with these known bi...
This class contains a discriminated union of information about pointers in memory operands,...
bool isDereferenceable(unsigned Size, LLVMContext &C, const DataLayout &DL) const
Return true if memory region [V, V+Offset+Size) is known to be dereferenceable.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowContract(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
DenormalMode FP32Denormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
bool isBeforeLegalizeOps() const
CombineLevel getDAGCombineLevel()
void AddToWorklist(SDNode *N)
bool isCalledByLegalizer() const
bool isBeforeLegalize() const
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...