28#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "AMDGPUtti"
40struct AMDGPUImageDMaskIntrinsic {
44#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
45#include "AMDGPUGenSearchableTables.inc"
75 Type *VTy = V.getType();
84 APFloat FloatValue(ConstFloat->getValueAPF());
85 bool LosesInfo =
true;
94 APInt IntValue(ConstInt->getValue());
113 Type *VTy = V.getType();
139 Func(Args, OverloadTys);
151 bool RemoveOldIntr = &OldIntr != &InstToReplace;
160static std::optional<Instruction *>
165 if (
const auto *LZMappingInfo =
167 if (
auto *ConstantLod =
169 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
174 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
175 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
182 if (
const auto *MIPMappingInfo =
184 if (
auto *ConstantMip =
186 if (ConstantMip->isZero()) {
191 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
192 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
199 if (
const auto *BiasMappingInfo =
201 if (
auto *ConstantBias =
203 if (ConstantBias->isZero()) {
208 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
209 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
210 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
217 if (
const auto *OffsetMappingInfo =
219 if (
auto *ConstantOffset =
221 if (ConstantOffset->isZero()) {
224 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
226 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
227 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
234 if (ST->hasD16Images()) {
244 if (
II.hasOneUse()) {
247 if (
User->getOpcode() == Instruction::FPTrunc &&
251 [&](
auto &Args,
auto &ArgTys) {
254 ArgTys[0] = User->getType();
263 bool AllHalfExtracts =
true;
265 for (
User *U :
II.users()) {
267 if (!Ext || !Ext->hasOneUse()) {
268 AllHalfExtracts =
false;
273 if (!Tr || !Tr->getType()->isHalfTy()) {
274 AllHalfExtracts =
false;
281 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
292 OverloadTys[0] = HalfVecTy;
295 M, ImageDimIntr->
Intr, OverloadTys);
297 II.mutateType(HalfVecTy);
298 II.setCalledFunction(HalfDecl);
301 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
302 Value *Idx = Ext->getIndexOperand();
304 Builder.SetInsertPoint(Tr);
306 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
309 Tr->replaceAllUsesWith(HalfExtract);
312 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
323 if (!ST->hasA16() && !ST->hasG16())
330 bool FloatCoord =
false;
332 bool OnlyDerivatives =
false;
335 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
336 Value *Coord =
II.getOperand(OperandIndex);
339 if (OperandIndex < ImageDimIntr->CoordStart ||
344 OnlyDerivatives =
true;
353 if (!OnlyDerivatives && !ST->hasA16())
354 OnlyDerivatives =
true;
357 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
360 "Only image instructions with a sampler can have a bias");
362 OnlyDerivatives =
true;
365 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
373 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
374 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
375 if (!OnlyDerivatives) {
376 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
379 if (ImageDimIntr->NumBiasArgs != 0)
380 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
386 OperandIndex < EndIndex; OperandIndex++) {
388 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
393 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
394 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
423 Value *Src =
nullptr;
426 if (Src->getType()->isHalfTy())
443 unsigned VWidth = VTy->getNumElements();
446 for (
int i = VWidth - 1; i > 0; --i) {
468 unsigned VWidth = VTy->getNumElements();
474 SVI->getShuffleMask(ShuffleMask);
476 for (
int I = VWidth - 1;
I > 0; --
I) {
477 if (ShuffleMask.empty()) {
528 unsigned LaneArgIdx)
const {
529 unsigned MaskBits = ST->getWavefrontSizeLog2();
543 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
546 if (MaskedConst != LaneArg) {
547 II.getOperandUse(LaneArgIdx).set(MaskedConst);
559 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
575 if (ST.isWave32() &&
match(V, W32Pred))
577 if (ST.isWave64() &&
match(V, W64Pred))
586 const auto IID =
II.getIntrinsicID();
587 assert(IID == Intrinsic::amdgcn_readlane ||
588 IID == Intrinsic::amdgcn_readfirstlane ||
589 IID == Intrinsic::amdgcn_permlane64);
599 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
603 Value *LaneID =
nullptr;
605 LaneID =
II.getOperand(1);
619 const auto DoIt = [&](
unsigned OpIdx,
623 Ops.push_back(LaneID);
639 return DoIt(0,
II.getCalledFunction());
643 Type *SrcTy = Src->getType();
649 return DoIt(0, Remangled);
657 return DoIt(1,
II.getCalledFunction());
659 return DoIt(0,
II.getCalledFunction());
670 unsigned Depth = 0) {
680 return CI->getZExtValue();
689 std::optional<unsigned>
LHS =
693 std::optional<unsigned>
RHS =
702 return CI ? std::optional<unsigned>(CI->getZExtValue()) : std::nullopt;
710 unsigned WaveSize = ST.getWavefrontSize();
712 for (
unsigned Lane :
seq(WaveSize)) {
714 if (!Val || *Val >= WaveSize)
723template <
unsigned Period>
725 static_assert(
isPowerOf2_32(Period),
"Period must be a power of two");
726 for (
unsigned I = Period,
E = Ids.
size();
I <
E; ++
I)
727 if (Ids[
I] != Ids[
I % Period] + (
I & ~(Period - 1)))
735 for (
unsigned I = 0;
I <
N; ++
I)
751 return Ids[3] << 6 | Ids[2] << 4 | Ids[1] << 2 | Ids[0];
758 for (
unsigned J = 0; J <
N; ++J)
759 if (Ids[J] != (
N - 1) - J)
771 for (
unsigned J = 1; J < 16; ++J)
772 if (Ids[J] != (Ids[0] + J) % 16)
790 unsigned Mask = Ids[0];
793 for (
unsigned J = 0; J < 16; ++J)
794 if (Ids[J] != (Mask ^ J))
804 unsigned Selector = 0;
805 for (
unsigned J = 0; J < 8; ++J)
806 Selector |= Ids[J] << (J * 3);
815 for (
unsigned J = 0; J < 16; ++J)
816 Sel |=
static_cast<uint64_t>(Ids[J] & 0xF) << (J * 4);
823 if (Ids.
size() != 64)
825 for (
unsigned J = 0; J < 64; ++J)
826 if (Ids[J] != (J ^ 32))
837 for (
unsigned J = 0; J < 16; ++J) {
838 if (Ids[J] < 16 || Ids[J] >= 32)
840 if (Ids[J + 16] != Ids[J] - 16)
851static std::optional<unsigned>
860 unsigned AndMask = 0, OrMask = 0, XorMask = 0;
861 for (
unsigned B = 0;
B < 5; ++
B) {
862 unsigned Bit0 = (Ids[0] >>
B) & 1;
863 unsigned Bit1 = (Ids[1u <<
B] >>
B) & 1;
866 XorMask |= Bit0 <<
B;
874 for (
unsigned I :
seq(32u)) {
875 unsigned Expected = ((
I & AndMask) | OrMask) ^ XorMask;
891 return B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, {Ty},
893 B.getInt32(0xF),
B.getInt32(0xF),
B.getTrue()});
898 return B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp8, {Val->
getType()},
899 {Val,
B.getInt32(Selector)});
906 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane16, {Ty},
908 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
916 return B.CreateIntrinsic(Intrinsic::amdgcn_permlanex16, {Ty},
918 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
926 assert(
DL.getTypeSizeInBits(OrigTy) == 32 &&
927 "ds_swizzle only supports 32-bit operands");
931 Src =
B.CreatePtrToInt(Src, I32Ty);
932 else if (OrigTy != I32Ty)
933 Src =
B.CreateBitCast(Src, I32Ty);
934 Value *Result =
B.CreateIntrinsic(Intrinsic::amdgcn_ds_swizzle, {},
937 return B.CreateIntToPtr(Result, OrigTy);
939 return B.CreateBitCast(Result, OrigTy);
945 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {Val->
getType()},
975 if (ST.hasDPPRowShare()) {
980 if (ST.hasDPP() && ST.hasGFX10Insts()) {
990 if (ST.hasPermLaneX16()) {
1016static std::optional<Instruction *>
1020 if (
DL.getTypeSizeInBits(
II.getType()) != 32)
1021 return std::nullopt;
1023 if (!ST.isWaveSizeKnown())
1024 return std::nullopt;
1026 unsigned WaveSize = ST.getWavefrontSize();
1027 bool IsBpermute =
II.getIntrinsicID() == Intrinsic::amdgcn_ds_bpermute;
1028 Value *Src =
II.getArgOperand(IsBpermute ? 1 : 0);
1029 Value *Index =
II.getArgOperand(IsBpermute ? 0 : 1);
1034 for (
unsigned Lane :
seq(WaveSize)) {
1036 if (!Val || (*Val & 3) || (*Val >> 2) >= WaveSize)
1037 return std::nullopt;
1038 Ids[Lane] = *Val >> 2;
1042 return std::nullopt;
1047 return std::nullopt;
1052std::optional<Instruction *>
1056 case Intrinsic::amdgcn_implicitarg_ptr: {
1057 if (
II.getFunction()->hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
1059 uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*
II.getFunction());
1062 II.getAttributes().getRetDereferenceableOrNullBytes();
1063 if (CurrentOrNullBytes != 0) {
1066 uint64_t NewBytes = std::max(CurrentOrNullBytes, ImplicitArgBytes);
1069 II.removeRetAttr(Attribute::DereferenceableOrNull);
1073 uint64_t CurrentBytes =
II.getAttributes().getRetDereferenceableBytes();
1074 uint64_t NewBytes = std::max(CurrentBytes, ImplicitArgBytes);
1075 if (NewBytes != CurrentBytes) {
1081 return std::nullopt;
1083 case Intrinsic::amdgcn_rcp: {
1084 Value *Src =
II.getArgOperand(0);
1095 if (
II.isStrictFP())
1099 const APFloat &ArgVal =
C->getValueAPF();
1117 auto IID = SrcCI->getIntrinsicID();
1122 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
1132 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
1135 II.setFastMathFlags(InnerFMF);
1137 II.setCalledFunction(NewDecl);
1143 case Intrinsic::amdgcn_sqrt:
1144 case Intrinsic::amdgcn_rsq:
1145 case Intrinsic::amdgcn_tanh: {
1146 Value *Src =
II.getArgOperand(0);
1158 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
1160 II.getModule(), Intrinsic::sqrt, {II.getType()});
1161 II.setCalledFunction(NewDecl);
1167 case Intrinsic::amdgcn_log:
1168 case Intrinsic::amdgcn_exp2: {
1169 const bool IsLog = IID == Intrinsic::amdgcn_log;
1170 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
1171 Value *Src =
II.getArgOperand(0);
1181 if (
C->isInfinity()) {
1184 if (!
C->isNegative())
1188 if (IsExp &&
C->isNegative())
1192 if (
II.isStrictFP())
1196 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
1201 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
1203 : ConstantFP::get(Ty, 1.0);
1207 if (IsLog &&
C->isNegative())
1215 case Intrinsic::amdgcn_frexp_mant:
1216 case Intrinsic::amdgcn_frexp_exp: {
1217 Value *Src =
II.getArgOperand(0);
1223 if (IID == Intrinsic::amdgcn_frexp_mant) {
1225 II, ConstantFP::get(
II.getContext(), Significand));
1245 case Intrinsic::amdgcn_class: {
1246 Value *Src0 =
II.getArgOperand(0);
1247 Value *Src1 =
II.getArgOperand(1);
1251 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
1254 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
1275 case Intrinsic::amdgcn_cvt_pkrtz: {
1276 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
1289 return ConstantFP::get(HalfTy, Val);
1292 Value *Src =
nullptr;
1294 if (Src->getType()->isHalfTy())
1301 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
1302 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
1312 case Intrinsic::amdgcn_cvt_pknorm_i16:
1313 case Intrinsic::amdgcn_cvt_pknorm_u16:
1314 case Intrinsic::amdgcn_cvt_pk_i16:
1315 case Intrinsic::amdgcn_cvt_pk_u16: {
1316 Value *Src0 =
II.getArgOperand(0);
1317 Value *Src1 =
II.getArgOperand(1);
1329 case Intrinsic::amdgcn_cvt_off_f32_i4: {
1330 Value* Arg =
II.getArgOperand(0);
1344 constexpr size_t ResValsSize = 16;
1345 static constexpr float ResVals[ResValsSize] = {
1346 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
1347 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
1349 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
1352 case Intrinsic::amdgcn_ubfe:
1353 case Intrinsic::amdgcn_sbfe: {
1355 Value *Src =
II.getArgOperand(0);
1362 unsigned IntSize = Ty->getIntegerBitWidth();
1367 if ((Width & (IntSize - 1)) == 0) {
1372 if (Width >= IntSize) {
1374 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
1385 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
1389 bool Signed = IID == Intrinsic::amdgcn_sbfe;
1391 if (!CWidth || !COffset)
1401 if (
Offset + Width < IntSize) {
1405 RightShift->takeName(&
II);
1412 RightShift->takeName(&
II);
1415 case Intrinsic::amdgcn_exp:
1416 case Intrinsic::amdgcn_exp_row:
1417 case Intrinsic::amdgcn_exp_compr: {
1423 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
1425 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
1426 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
1427 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
1428 Value *Src =
II.getArgOperand(
I + 2);
1442 case Intrinsic::amdgcn_fmed3: {
1443 Value *Src0 =
II.getArgOperand(0);
1444 Value *Src1 =
II.getArgOperand(1);
1445 Value *Src2 =
II.getArgOperand(2);
1447 for (
Value *Src : {Src0, Src1, Src2}) {
1452 if (
II.isStrictFP())
1489 const APFloat *ConstSrc0 =
nullptr;
1490 const APFloat *ConstSrc1 =
nullptr;
1491 const APFloat *ConstSrc2 =
nullptr;
1496 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1516 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1539 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1559 CI->copyFastMathFlags(&
II);
1585 II.setArgOperand(0, Src0);
1586 II.setArgOperand(1, Src1);
1587 II.setArgOperand(2, Src2);
1597 ConstantFP::get(
II.getType(), Result));
1602 if (!ST->hasMed3_16())
1611 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1619 case Intrinsic::amdgcn_icmp:
1620 case Intrinsic::amdgcn_fcmp: {
1624 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1631 Value *Src0 =
II.getArgOperand(0);
1632 Value *Src1 =
II.getArgOperand(1);
1652 II.getType(), Args);
1653 NewCall->
addFnAttr(Attribute::Convergent);
1661 II.setArgOperand(0, Src1);
1662 II.setArgOperand(1, Src0);
1664 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1711 ? Intrinsic::amdgcn_fcmp
1712 : Intrinsic::amdgcn_icmp;
1717 unsigned Width = CmpType->getBitWidth();
1718 unsigned NewWidth = Width;
1726 else if (Width <= 32)
1728 else if (Width <= 64)
1733 if (Width != NewWidth) {
1743 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1746 Value *Args[] = {SrcLHS, SrcRHS,
1747 ConstantInt::get(CC->
getType(), SrcPred)};
1749 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1756 case Intrinsic::amdgcn_mbcnt_hi:
1761 case Intrinsic::amdgcn_mbcnt_lo: {
1774 if (std::optional<ConstantRange> ExistingRange =
II.getRange()) {
1775 ComputedRange = ComputedRange.
intersectWith(*ExistingRange);
1776 if (ComputedRange == *ExistingRange)
1780 II.addRangeRetAttr(ComputedRange);
1783 case Intrinsic::amdgcn_ballot: {
1784 Value *Arg =
II.getArgOperand(0);
1789 if (Src->isZero()) {
1794 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1801 {IC.Builder.getInt32Ty()},
1802 {II.getArgOperand(0)}),
1809 case Intrinsic::amdgcn_wavefrontsize: {
1810 if (ST->isWaveSizeKnown())
1812 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1815 case Intrinsic::amdgcn_wqm_vote: {
1822 case Intrinsic::amdgcn_kill: {
1824 if (!
C || !
C->getZExtValue())
1830 case Intrinsic::amdgcn_s_sendmsg:
1831 case Intrinsic::amdgcn_s_sendmsghalt: {
1837 Value *M0Val =
II.getArgOperand(1);
1843 decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
1845 if (!msgDoesNotUseM0(MsgId, *ST))
1849 II.dropUBImplyingAttrsAndMetadata();
1853 case Intrinsic::amdgcn_update_dpp: {
1854 Value *Old =
II.getArgOperand(0);
1859 if (BC->isNullValue() || RM->getZExtValue() != 0xF ||
1866 case Intrinsic::amdgcn_permlane16:
1867 case Intrinsic::amdgcn_permlane16_var:
1868 case Intrinsic::amdgcn_permlanex16:
1869 case Intrinsic::amdgcn_permlanex16_var: {
1871 Value *VDstIn =
II.getArgOperand(0);
1876 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1877 IID == Intrinsic::amdgcn_permlanex16)
1884 unsigned int BcIdx = FiIdx + 1;
1893 case Intrinsic::amdgcn_wave_shuffle:
1895 case Intrinsic::amdgcn_permlane64:
1896 case Intrinsic::amdgcn_readfirstlane:
1897 case Intrinsic::amdgcn_readlane:
1898 case Intrinsic::amdgcn_ds_bpermute: {
1900 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1901 const Use &Src =
II.getArgOperandUse(SrcIdx);
1905 if (IID == Intrinsic::amdgcn_readlane &&
1912 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1913 const Use &Lane =
II.getArgOperandUse(0);
1917 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1918 II.setCalledFunction(NewDecl);
1919 II.setOperand(0, Src);
1920 II.setOperand(1, NewLane);
1925 if (IID == Intrinsic::amdgcn_ds_bpermute)
1931 return std::nullopt;
1933 case Intrinsic::amdgcn_writelane: {
1937 return std::nullopt;
1939 case Intrinsic::amdgcn_trig_preop: {
1942 if (!
II.getType()->isDoubleTy())
1945 Value *Src =
II.getArgOperand(0);
1946 Value *Segment =
II.getArgOperand(1);
1955 if (StrippedSign != Src)
1958 if (
II.isStrictFP())
1980 unsigned Shift = SegmentVal * 53;
1985 static const uint32_t TwoByPi[] = {
1986 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1987 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1988 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1989 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1990 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1991 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1995 unsigned Idx = Shift >> 5;
1996 if (Idx + 2 >= std::size(TwoByPi)) {
2001 unsigned BShift = Shift & 0x1f;
2005 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
2009 int Scale = -53 - Shift;
2016 case Intrinsic::amdgcn_fmul_legacy: {
2017 Value *Op0 =
II.getArgOperand(0);
2018 Value *Op1 =
II.getArgOperand(1);
2020 for (
Value *Src : {Op0, Op1}) {
2041 case Intrinsic::amdgcn_fma_legacy: {
2042 Value *Op0 =
II.getArgOperand(0);
2043 Value *Op1 =
II.getArgOperand(1);
2044 Value *Op2 =
II.getArgOperand(2);
2046 for (
Value *Src : {Op0, Op1, Op2}) {
2068 II.getModule(), Intrinsic::fma,
II.getType()));
2073 case Intrinsic::amdgcn_is_shared:
2074 case Intrinsic::amdgcn_is_private: {
2075 Value *Src =
II.getArgOperand(0);
2085 case Intrinsic::amdgcn_make_buffer_rsrc: {
2086 Value *Src =
II.getArgOperand(0);
2089 return std::nullopt;
2091 case Intrinsic::amdgcn_raw_buffer_store_format:
2092 case Intrinsic::amdgcn_struct_buffer_store_format:
2093 case Intrinsic::amdgcn_raw_tbuffer_store:
2094 case Intrinsic::amdgcn_struct_tbuffer_store:
2095 case Intrinsic::amdgcn_image_store_1d:
2096 case Intrinsic::amdgcn_image_store_1darray:
2097 case Intrinsic::amdgcn_image_store_2d:
2098 case Intrinsic::amdgcn_image_store_2darray:
2099 case Intrinsic::amdgcn_image_store_2darraymsaa:
2100 case Intrinsic::amdgcn_image_store_2dmsaa:
2101 case Intrinsic::amdgcn_image_store_3d:
2102 case Intrinsic::amdgcn_image_store_cube:
2103 case Intrinsic::amdgcn_image_store_mip_1d:
2104 case Intrinsic::amdgcn_image_store_mip_1darray:
2105 case Intrinsic::amdgcn_image_store_mip_2d:
2106 case Intrinsic::amdgcn_image_store_mip_2darray:
2107 case Intrinsic::amdgcn_image_store_mip_3d:
2108 case Intrinsic::amdgcn_image_store_mip_cube: {
2113 if (ST->hasDefaultComponentBroadcast())
2115 else if (ST->hasDefaultComponentZero())
2120 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
2128 case Intrinsic::amdgcn_prng_b32: {
2129 auto *Src =
II.getArgOperand(0);
2133 return std::nullopt;
2135 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
2136 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
2137 Value *Src0 =
II.getArgOperand(0);
2138 Value *Src1 =
II.getArgOperand(1);
2144 auto getFormatNumRegs = [](
unsigned FormatVal) {
2145 switch (FormatVal) {
2159 bool MadeChange =
false;
2160 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
2161 unsigned Src1NumElts = getFormatNumRegs(BLGP);
2165 if (Src0Ty->getNumElements() > Src0NumElts) {
2172 if (Src1Ty->getNumElements() > Src1NumElts) {
2180 return std::nullopt;
2191 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
2192 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
2193 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
2194 Value *Src0 =
II.getArgOperand(1);
2195 Value *Src1 =
II.getArgOperand(3);
2201 bool MadeChange =
false;
2207 if (Src0Ty->getNumElements() > Src0NumElts) {
2214 if (Src1Ty->getNumElements() > Src1NumElts) {
2222 return std::nullopt;
2239 return std::nullopt;
2252 int DMaskIdx,
bool IsLoad) {
2255 :
II.getOperand(0)->getType());
2256 unsigned VWidth = IIVTy->getNumElements();
2259 Type *EltTy = IIVTy->getElementType();
2271 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
2276 DemandedElts = (1 << ActiveBits) - 1;
2278 if (UnusedComponentsAtFront > 0) {
2279 static const unsigned InvalidOffsetIdx = 0xf;
2282 switch (
II.getIntrinsicID()) {
2283 case Intrinsic::amdgcn_raw_buffer_load:
2284 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2287 case Intrinsic::amdgcn_s_buffer_load:
2291 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
2292 OffsetIdx = InvalidOffsetIdx;
2296 case Intrinsic::amdgcn_struct_buffer_load:
2297 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2302 OffsetIdx = InvalidOffsetIdx;
2306 if (OffsetIdx != InvalidOffsetIdx) {
2308 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
2309 auto *
Offset = Args[OffsetIdx];
2310 unsigned SingleComponentSizeInBits =
2312 unsigned OffsetAdd =
2313 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
2314 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
2331 unsigned NewDMaskVal = 0;
2332 unsigned OrigLdStIdx = 0;
2333 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
2334 const unsigned Bit = 1 << SrcIdx;
2335 if (!!(DMaskVal & Bit)) {
2336 if (!!DemandedElts[OrigLdStIdx])
2342 if (DMaskVal != NewDMaskVal)
2343 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
2346 unsigned NewNumElts = DemandedElts.
popcount();
2350 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
2352 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
2364 OverloadTys[0] = NewTy;
2368 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
2369 if (DemandedElts[OrigStoreIdx])
2372 if (NewNumElts == 1)
2384 if (NewNumElts == 1) {
2390 unsigned NewLoadIdx = 0;
2391 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
2392 if (!!DemandedElts[OrigLoadIdx])
2408 APInt &UndefElts)
const {
2413 const unsigned FirstElt = DemandedElts.
countr_zero();
2415 const unsigned MaskLen = LastElt - FirstElt + 1;
2417 unsigned OldNumElts = VT->getNumElements();
2418 if (MaskLen == OldNumElts && MaskLen != 1)
2421 Type *EltTy = VT->getElementType();
2429 Value *Src =
II.getArgOperand(0);
2434 II.getOperandBundlesAsDefs(OpBundles);
2451 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2452 if (DemandedElts[FirstElt +
I])
2453 ExtractMask[
I] = FirstElt +
I;
2462 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2463 if (DemandedElts[FirstElt +
I])
2464 InsertMask[FirstElt +
I] =
I;
2476 SimplifyAndSetOp)
const {
2477 switch (
II.getIntrinsicID()) {
2478 case Intrinsic::amdgcn_readfirstlane:
2479 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2481 case Intrinsic::amdgcn_raw_buffer_load:
2482 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2483 case Intrinsic::amdgcn_raw_buffer_load_format:
2484 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
2485 case Intrinsic::amdgcn_raw_tbuffer_load:
2486 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
2487 case Intrinsic::amdgcn_s_buffer_load:
2488 case Intrinsic::amdgcn_struct_buffer_load:
2489 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2490 case Intrinsic::amdgcn_struct_buffer_load_format:
2491 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
2492 case Intrinsic::amdgcn_struct_tbuffer_load:
2493 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2496 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2502 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * createPermlane16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlane16 with the precomputed lane-select halves.
static std::optional< unsigned > matchRowSharePattern(ArrayRef< uint8_t > Ids)
Match a row-share pattern: all 16 lanes of each row read the same source lane.
static bool matchMirrorPattern(ArrayRef< uint8_t > Ids)
Match an N-lane reversal (mirror) pattern.
static bool tryBuildShuffleMap(Value *Index, const GCNSubtarget &ST, SmallVectorImpl< uint8_t > &Ids, const DataLayout &DL)
Build the per-lane shuffle map by evaluating Index for every lane in the wave.
static std::optional< unsigned > matchQuadPermPattern(ArrayRef< uint8_t > Ids)
Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids...
static std::optional< unsigned > matchHalfRowPermPattern(ArrayRef< uint8_t > Ids)
Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per ...
static std::optional< unsigned > matchRowXMaskPattern(ArrayRef< uint8_t > Ids)
Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1,...
static constexpr auto matchHalfRowMirrorPattern
static Value * createPermlaneX16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlanex16 with the precomputed lane-select halves.
static bool isRowPattern(ArrayRef< uint8_t > Ids)
Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row,...
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static constexpr auto isFullRowPattern
static constexpr auto isQuadPattern
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static uint64_t computePermlane16Masks(ArrayRef< uint8_t > Ids)
Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4...
static bool matchHalfWaveSwapPattern(ArrayRef< uint8_t > Ids)
Match a half-wave swap: lane J reads from lane J ^ 32.
static bool hasPeriodicLayout(ArrayRef< uint8_t > Ids)
Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = I...
static std::optional< Instruction * > tryOptimizeShufflePattern(InstCombiner &IC, IntrinsicInst &II, const GCNSubtarget &ST)
Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a ...
static constexpr auto isHalfRowPattern
static APInt defaultComponentBroadcast(Value *V)
static std::optional< unsigned > matchDsSwizzleBitmaskPattern(ArrayRef< uint8_t > Ids)
Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask ...
static Value * createDsSwizzle(IRBuilderBase &B, Value *Val, unsigned Offset, const DataLayout &DL)
Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 a...
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static Value * matchShuffleToHWIntrinsic(IRBuilderBase &B, Value *Src, ArrayRef< uint8_t > Ids, const GCNSubtarget &ST, const DataLayout &DL)
Given a shuffle map, try to emit the best hardware intrinsic.
static std::optional< unsigned > matchRowRotatePattern(ArrayRef< uint8_t > Ids)
Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].
static bool isCrossRowPattern(ArrayRef< uint8_t > Ids)
Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads fr...
static bool isThreadID(const GCNSubtarget &ST, Value *V)
static Value * createUpdateDpp(IRBuilderBase &B, Value *Val, unsigned Ctrl)
Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds...
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * createMovDpp8(IRBuilderBase &B, Value *Val, unsigned Selector)
Emit v_mov_b32_dpp8 with the given 24-bit lane selector.
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
static constexpr auto matchFullRowMirrorPattern
static std::optional< unsigned > evalLaneExpr(Value *V, unsigned Lane, const GCNSubtarget &ST, const DataLayout &DL, unsigned Depth=0)
Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a c...
static Value * createPermlane64(IRBuilderBase &B, Value *Val)
Emit v_permlane64 (swap of the two 32-lane halves of a wave64).
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
Provides some synthesis utilities to produce sequences of values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
size_t size() const
Get the array size.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Tagged union holding either a T or a Error.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
static Value * stripSignOnlyFPOps(Value *Val)
Ignore all operations which only change the sign of a value, returning the underlying magnitude value...
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
auto dyn_cast_or_null(const Y &Val)
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
constexpr unsigned MaxAnalysisRecursionDepth
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.