28#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "AMDGPUtti"
40struct AMDGPUImageDMaskIntrinsic {
44#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
45#include "AMDGPUGenSearchableTables.inc"
56 "nans handled separately");
73 Type *VTy = V.getType();
82 APFloat FloatValue(ConstFloat->getValueAPF());
83 bool LosesInfo =
true;
92 APInt IntValue(ConstInt->getValue());
111 Type *VTy = V.getType();
137 Func(Args, OverloadTys);
152 bool RemoveOldIntr = &OldIntr != &InstToReplace;
161static std::optional<Instruction *>
166 if (
const auto *LZMappingInfo =
168 if (
auto *ConstantLod =
170 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
175 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
176 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
183 if (
const auto *MIPMappingInfo =
185 if (
auto *ConstantMip =
187 if (ConstantMip->isZero()) {
192 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
193 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
200 if (
const auto *BiasMappingInfo =
202 if (
auto *ConstantBias =
204 if (ConstantBias->isZero()) {
209 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
210 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
211 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
218 if (
const auto *OffsetMappingInfo =
220 if (
auto *ConstantOffset =
222 if (ConstantOffset->isZero()) {
225 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
227 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
228 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
235 if (ST->hasD16Images()) {
245 if (
II.hasOneUse()) {
248 if (
User->getOpcode() == Instruction::FPTrunc &&
252 [&](
auto &Args,
auto &ArgTys) {
255 ArgTys[0] = User->getType();
264 bool AllHalfExtracts =
true;
266 for (
User *U :
II.users()) {
268 if (!Ext || !Ext->hasOneUse()) {
269 AllHalfExtracts =
false;
274 if (!Tr || !Tr->getType()->isHalfTy()) {
275 AllHalfExtracts =
false;
282 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
293 OverloadTys[0] = HalfVecTy;
296 M, ImageDimIntr->
Intr, OverloadTys);
298 II.mutateType(HalfVecTy);
299 II.setCalledFunction(HalfDecl);
302 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
303 Value *Idx = Ext->getIndexOperand();
305 Builder.SetInsertPoint(Tr);
307 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
310 Tr->replaceAllUsesWith(HalfExtract);
313 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
324 if (!ST->hasA16() && !ST->hasG16())
331 bool FloatCoord =
false;
333 bool OnlyDerivatives =
false;
336 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
337 Value *Coord =
II.getOperand(OperandIndex);
340 if (OperandIndex < ImageDimIntr->CoordStart ||
345 OnlyDerivatives =
true;
354 if (!OnlyDerivatives && !ST->hasA16())
355 OnlyDerivatives =
true;
358 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
361 "Only image instructions with a sampler can have a bias");
363 OnlyDerivatives =
true;
366 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
374 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
375 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
376 if (!OnlyDerivatives) {
377 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
380 if (ImageDimIntr->NumBiasArgs != 0)
381 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
387 OperandIndex < EndIndex; OperandIndex++) {
389 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
394 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
395 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
424 Value *Src =
nullptr;
427 if (Src->getType()->isHalfTy())
444 unsigned VWidth = VTy->getNumElements();
447 for (
int i = VWidth - 1; i > 0; --i) {
469 unsigned VWidth = VTy->getNumElements();
475 SVI->getShuffleMask(ShuffleMask);
477 for (
int I = VWidth - 1;
I > 0; --
I) {
478 if (ShuffleMask.empty()) {
529 unsigned LaneArgIdx)
const {
530 unsigned MaskBits = ST->getWavefrontSizeLog2();
544 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
547 if (MaskedConst != LaneArg) {
548 II.getOperandUse(LaneArgIdx).set(MaskedConst);
560 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
576 if (ST.isWave32() &&
match(V, W32Pred))
578 if (ST.isWave64() &&
match(V, W64Pred))
587 const auto IID =
II.getIntrinsicID();
588 assert(IID == Intrinsic::amdgcn_readlane ||
589 IID == Intrinsic::amdgcn_readfirstlane ||
590 IID == Intrinsic::amdgcn_permlane64);
600 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
604 Value *LaneID =
nullptr;
606 LaneID =
II.getOperand(1);
620 const auto DoIt = [&](
unsigned OpIdx,
624 Ops.push_back(LaneID);
640 return DoIt(0,
II.getCalledFunction());
644 Type *SrcTy = Src->getType();
650 return DoIt(0, Remangled);
658 return DoIt(1,
II.getCalledFunction());
660 return DoIt(0,
II.getCalledFunction());
671 unsigned Depth = 0) {
681 return CI->getZExtValue();
690 std::optional<unsigned>
LHS =
694 std::optional<unsigned>
RHS =
703 return CI ? std::optional<unsigned>(CI->getZExtValue()) : std::nullopt;
711 unsigned WaveSize = ST.getWavefrontSize();
713 for (
unsigned Lane :
seq(WaveSize)) {
715 if (!Val || *Val >= WaveSize)
724template <
unsigned Period>
726 static_assert(
isPowerOf2_32(Period),
"Period must be a power of two");
727 for (
unsigned I = Period,
E = Ids.
size();
I <
E; ++
I)
728 if (Ids[
I] != Ids[
I % Period] + (
I & ~(Period - 1)))
736 for (
unsigned I = 0;
I <
N; ++
I)
752 return Ids[3] << 6 | Ids[2] << 4 | Ids[1] << 2 | Ids[0];
759 for (
unsigned J = 0; J <
N; ++J)
760 if (Ids[J] != (
N - 1) - J)
772 for (
unsigned J = 1; J < 16; ++J)
773 if (Ids[J] != (Ids[0] + J) % 16)
791 unsigned Mask = Ids[0];
794 for (
unsigned J = 0; J < 16; ++J)
795 if (Ids[J] != (Mask ^ J))
805 unsigned Selector = 0;
806 for (
unsigned J = 0; J < 8; ++J)
807 Selector |= Ids[J] << (J * 3);
816 for (
unsigned J = 0; J < 16; ++J)
817 Sel |=
static_cast<uint64_t>(Ids[J] & 0xF) << (J * 4);
824 if (Ids.
size() != 64)
826 for (
unsigned J = 0; J < 64; ++J)
827 if (Ids[J] != (J ^ 32))
838 for (
unsigned J = 0; J < 16; ++J) {
839 if (Ids[J] < 16 || Ids[J] >= 32)
841 if (Ids[J + 16] != Ids[J] - 16)
852static std::optional<unsigned>
861 unsigned AndMask = 0, OrMask = 0, XorMask = 0;
862 for (
unsigned B = 0;
B < 5; ++
B) {
863 unsigned Bit0 = (Ids[0] >>
B) & 1;
864 unsigned Bit1 = (Ids[1u <<
B] >>
B) & 1;
867 XorMask |= Bit0 <<
B;
875 for (
unsigned I :
seq(32u)) {
876 unsigned Expected = ((
I & AndMask) | OrMask) ^ XorMask;
891static std::optional<unsigned>
902 for (
unsigned I = 0;
I < 32; ++
I)
903 if (Ids[
I] != (
I +
N) % 32)
915 return B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, {Ty},
917 B.getInt32(0xF),
B.getInt32(0xF),
B.getTrue()});
922 return B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp8, {Val->
getType()},
923 {Val,
B.getInt32(Selector)});
930 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane16, {Ty},
932 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
940 return B.CreateIntrinsic(Intrinsic::amdgcn_permlanex16, {Ty},
942 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
950 assert(
DL.getTypeSizeInBits(OrigTy) == 32 &&
951 "ds_swizzle only supports 32-bit operands");
955 Src =
B.CreatePtrToInt(Src, I32Ty);
956 else if (OrigTy != I32Ty)
957 Src =
B.CreateBitCast(Src, I32Ty);
958 Value *Result =
B.CreateIntrinsic(Intrinsic::amdgcn_ds_swizzle, {},
961 return B.CreateIntToPtr(Result, OrigTy);
963 return B.CreateBitCast(Result, OrigTy);
969 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {Val->
getType()},
999 if (ST.hasDPPRowShare()) {
1004 if (ST.hasDPP() && ST.hasGFX10Insts()) {
1014 if (ST.hasPermlane16Insts()) {
1034 if (ST.hasDsSwizzleRotateMode()) {
1047static std::optional<Instruction *>
1051 if (
DL.getTypeSizeInBits(
II.getType()) != 32)
1052 return std::nullopt;
1054 if (!ST.isWaveSizeKnown())
1055 return std::nullopt;
1057 unsigned WaveSize = ST.getWavefrontSize();
1058 bool IsBpermute =
II.getIntrinsicID() == Intrinsic::amdgcn_ds_bpermute;
1059 Value *Src =
II.getArgOperand(IsBpermute ? 1 : 0);
1060 Value *Index =
II.getArgOperand(IsBpermute ? 0 : 1);
1065 for (
unsigned Lane :
seq(WaveSize)) {
1067 if (!Val || (*Val & 3) || (*Val >> 2) >= WaveSize)
1068 return std::nullopt;
1069 Ids[Lane] = *Val >> 2;
1073 return std::nullopt;
1078 return std::nullopt;
1082std::optional<Instruction *>
1086 case Intrinsic::amdgcn_implicitarg_ptr: {
1087 if (
II.getFunction()->hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
1089 uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*
II.getFunction());
1092 II.getAttributes().getRetDereferenceableOrNullBytes();
1093 if (CurrentOrNullBytes != 0) {
1096 uint64_t NewBytes = std::max(CurrentOrNullBytes, ImplicitArgBytes);
1099 II.removeRetAttr(Attribute::DereferenceableOrNull);
1103 uint64_t CurrentBytes =
II.getAttributes().getRetDereferenceableBytes();
1104 uint64_t NewBytes = std::max(CurrentBytes, ImplicitArgBytes);
1105 if (NewBytes != CurrentBytes) {
1111 return std::nullopt;
1113 case Intrinsic::amdgcn_rcp: {
1114 Value *Src =
II.getArgOperand(0);
1125 if (
II.isStrictFP())
1129 const APFloat &ArgVal =
C->getValueAPF();
1147 auto IID = SrcCI->getIntrinsicID();
1152 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
1162 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
1165 II.setFastMathFlags(InnerFMF);
1167 II.setCalledFunction(NewDecl);
1173 case Intrinsic::amdgcn_sqrt:
1174 case Intrinsic::amdgcn_rsq:
1175 case Intrinsic::amdgcn_tanh: {
1176 Value *Src =
II.getArgOperand(0);
1188 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
1190 II.getModule(), Intrinsic::sqrt, {II.getType()});
1191 II.setCalledFunction(NewDecl);
1197 case Intrinsic::amdgcn_log:
1198 case Intrinsic::amdgcn_exp2: {
1199 const bool IsLog = IID == Intrinsic::amdgcn_log;
1200 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
1201 Value *Src =
II.getArgOperand(0);
1211 if (
C->isInfinity()) {
1214 if (!
C->isNegative())
1218 if (IsExp &&
C->isNegative())
1222 if (
II.isStrictFP())
1226 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
1231 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
1233 : ConstantFP::get(Ty, 1.0);
1237 if (IsLog &&
C->isNegative())
1245 case Intrinsic::amdgcn_frexp_mant:
1246 case Intrinsic::amdgcn_frexp_exp: {
1247 Value *Src =
II.getArgOperand(0);
1253 if (IID == Intrinsic::amdgcn_frexp_mant) {
1255 II, ConstantFP::get(
II.getContext(), Significand));
1275 case Intrinsic::amdgcn_class: {
1276 Value *Src0 =
II.getArgOperand(0);
1277 Value *Src1 =
II.getArgOperand(1);
1281 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
1284 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
1305 case Intrinsic::amdgcn_cvt_pkrtz: {
1306 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
1319 return ConstantFP::get(HalfTy, Val);
1322 Value *Src =
nullptr;
1324 if (Src->getType()->isHalfTy())
1331 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
1332 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
1342 case Intrinsic::amdgcn_cvt_pknorm_i16:
1343 case Intrinsic::amdgcn_cvt_pknorm_u16:
1344 case Intrinsic::amdgcn_cvt_pk_i16:
1345 case Intrinsic::amdgcn_cvt_pk_u16: {
1346 Value *Src0 =
II.getArgOperand(0);
1347 Value *Src1 =
II.getArgOperand(1);
1359 case Intrinsic::amdgcn_cvt_off_f32_i4: {
1360 Value* Arg =
II.getArgOperand(0);
1374 constexpr size_t ResValsSize = 16;
1375 static constexpr float ResVals[ResValsSize] = {
1376 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
1377 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
1379 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
1382 case Intrinsic::amdgcn_ubfe:
1383 case Intrinsic::amdgcn_sbfe: {
1385 Value *Src =
II.getArgOperand(0);
1392 unsigned IntSize = Ty->getIntegerBitWidth();
1397 if ((Width & (IntSize - 1)) == 0) {
1402 if (Width >= IntSize) {
1404 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
1415 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
1419 bool Signed = IID == Intrinsic::amdgcn_sbfe;
1421 if (!CWidth || !COffset)
1431 if (
Offset + Width < IntSize) {
1435 RightShift->takeName(&
II);
1442 RightShift->takeName(&
II);
1445 case Intrinsic::amdgcn_exp:
1446 case Intrinsic::amdgcn_exp_row:
1447 case Intrinsic::amdgcn_exp_compr: {
1453 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
1455 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
1456 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
1457 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
1458 Value *Src =
II.getArgOperand(
I + 2);
1472 case Intrinsic::amdgcn_fmed3: {
1473 Value *Src0 =
II.getArgOperand(0);
1474 Value *Src1 =
II.getArgOperand(1);
1475 Value *Src2 =
II.getArgOperand(2);
1477 for (
Value *Src : {Src0, Src1, Src2}) {
1482 if (
II.isStrictFP())
1519 const APFloat *ConstSrc0 =
nullptr;
1520 const APFloat *ConstSrc1 =
nullptr;
1521 const APFloat *ConstSrc2 =
nullptr;
1526 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1546 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1569 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1589 CI->copyFastMathFlags(&
II);
1615 II.setArgOperand(0, Src0);
1616 II.setArgOperand(1, Src1);
1617 II.setArgOperand(2, Src2);
1627 ConstantFP::get(
II.getType(), Result));
1632 if (!ST->hasMed3_16())
1641 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1649 case Intrinsic::amdgcn_icmp:
1650 case Intrinsic::amdgcn_fcmp: {
1654 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1661 Value *Src0 =
II.getArgOperand(0);
1662 Value *Src1 =
II.getArgOperand(1);
1689 II.setArgOperand(0, Src1);
1690 II.setArgOperand(1, Src0);
1692 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1739 ? Intrinsic::amdgcn_fcmp
1740 : Intrinsic::amdgcn_icmp;
1745 unsigned Width = CmpType->getBitWidth();
1746 unsigned NewWidth = Width;
1754 else if (Width <= 32)
1756 else if (Width <= 64)
1761 if (Width != NewWidth) {
1771 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1774 Value *Args[] = {SrcLHS, SrcRHS,
1775 ConstantInt::get(CC->
getType(), SrcPred)};
1777 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1784 case Intrinsic::amdgcn_mbcnt_hi:
1789 case Intrinsic::amdgcn_mbcnt_lo: {
1802 if (std::optional<ConstantRange> ExistingRange =
II.getRange()) {
1803 ComputedRange = ComputedRange.
intersectWith(*ExistingRange);
1804 if (ComputedRange == *ExistingRange)
1808 II.addRangeRetAttr(ComputedRange);
1811 case Intrinsic::amdgcn_ballot: {
1812 Value *Arg =
II.getArgOperand(0);
1817 if (Src->isZero()) {
1822 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1829 {IC.Builder.getInt32Ty()},
1830 {II.getArgOperand(0)}),
1837 case Intrinsic::amdgcn_wavefrontsize: {
1838 if (ST->isWaveSizeKnown())
1840 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1843 case Intrinsic::amdgcn_wqm_vote: {
1850 case Intrinsic::amdgcn_kill: {
1852 if (!
C || !
C->getZExtValue())
1858 case Intrinsic::amdgcn_s_sendmsg:
1859 case Intrinsic::amdgcn_s_sendmsghalt: {
1865 Value *M0Val =
II.getArgOperand(1);
1871 decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
1873 if (!msgDoesNotUseM0(MsgId, *ST))
1877 II.dropUBImplyingAttrsAndMetadata();
1881 case Intrinsic::amdgcn_update_dpp: {
1882 Value *Old =
II.getArgOperand(0);
1887 if (BC->isNullValue() || RM->getZExtValue() != 0xF ||
1894 case Intrinsic::amdgcn_permlane16:
1895 case Intrinsic::amdgcn_permlane16_var:
1896 case Intrinsic::amdgcn_permlanex16:
1897 case Intrinsic::amdgcn_permlanex16_var: {
1899 Value *VDstIn =
II.getArgOperand(0);
1904 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1905 IID == Intrinsic::amdgcn_permlanex16)
1912 unsigned int BcIdx = FiIdx + 1;
1921 case Intrinsic::amdgcn_wave_shuffle:
1923 case Intrinsic::amdgcn_permlane64:
1924 case Intrinsic::amdgcn_readfirstlane:
1925 case Intrinsic::amdgcn_readlane:
1926 case Intrinsic::amdgcn_ds_bpermute: {
1928 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1929 const Use &Src =
II.getArgOperandUse(SrcIdx);
1933 if (IID == Intrinsic::amdgcn_readlane &&
1940 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1941 const Use &Lane =
II.getArgOperandUse(0);
1945 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1946 II.setCalledFunction(NewDecl);
1947 II.setOperand(0, Src);
1948 II.setOperand(1, NewLane);
1953 if (IID == Intrinsic::amdgcn_ds_bpermute)
1959 return std::nullopt;
1961 case Intrinsic::amdgcn_writelane: {
1965 return std::nullopt;
1967 case Intrinsic::amdgcn_trig_preop: {
1970 if (!
II.getType()->isDoubleTy())
1973 Value *Src =
II.getArgOperand(0);
1974 Value *Segment =
II.getArgOperand(1);
1983 if (StrippedSign != Src)
1986 if (
II.isStrictFP())
2008 unsigned Shift = SegmentVal * 53;
2013 static const uint32_t TwoByPi[] = {
2014 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
2015 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
2016 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
2017 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
2018 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
2019 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
2023 unsigned Idx = Shift >> 5;
2024 if (Idx + 2 >= std::size(TwoByPi)) {
2029 unsigned BShift = Shift & 0x1f;
2033 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
2037 int Scale = -53 - Shift;
2044 case Intrinsic::amdgcn_fmul_legacy: {
2045 Value *Op0 =
II.getArgOperand(0);
2046 Value *Op1 =
II.getArgOperand(1);
2048 for (
Value *Src : {Op0, Op1}) {
2069 case Intrinsic::amdgcn_fma_legacy: {
2070 Value *Op0 =
II.getArgOperand(0);
2071 Value *Op1 =
II.getArgOperand(1);
2072 Value *Op2 =
II.getArgOperand(2);
2074 for (
Value *Src : {Op0, Op1, Op2}) {
2096 II.getModule(), Intrinsic::fma,
II.getType()));
2101 case Intrinsic::amdgcn_is_shared:
2102 case Intrinsic::amdgcn_is_private: {
2103 Value *Src =
II.getArgOperand(0);
2113 case Intrinsic::amdgcn_make_buffer_rsrc: {
2114 Value *Src =
II.getArgOperand(0);
2117 return std::nullopt;
2119 case Intrinsic::amdgcn_raw_buffer_store_format:
2120 case Intrinsic::amdgcn_struct_buffer_store_format:
2121 case Intrinsic::amdgcn_raw_tbuffer_store:
2122 case Intrinsic::amdgcn_struct_tbuffer_store:
2123 case Intrinsic::amdgcn_image_store_1d:
2124 case Intrinsic::amdgcn_image_store_1darray:
2125 case Intrinsic::amdgcn_image_store_2d:
2126 case Intrinsic::amdgcn_image_store_2darray:
2127 case Intrinsic::amdgcn_image_store_2darraymsaa:
2128 case Intrinsic::amdgcn_image_store_2dmsaa:
2129 case Intrinsic::amdgcn_image_store_3d:
2130 case Intrinsic::amdgcn_image_store_cube:
2131 case Intrinsic::amdgcn_image_store_mip_1d:
2132 case Intrinsic::amdgcn_image_store_mip_1darray:
2133 case Intrinsic::amdgcn_image_store_mip_2d:
2134 case Intrinsic::amdgcn_image_store_mip_2darray:
2135 case Intrinsic::amdgcn_image_store_mip_3d:
2136 case Intrinsic::amdgcn_image_store_mip_cube: {
2141 if (ST->hasDefaultComponentBroadcast())
2143 else if (ST->hasDefaultComponentZero())
2148 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
2156 case Intrinsic::amdgcn_prng_b32: {
2157 auto *Src =
II.getArgOperand(0);
2161 return std::nullopt;
2163 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
2164 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
2165 Value *Src0 =
II.getArgOperand(0);
2166 Value *Src1 =
II.getArgOperand(1);
2172 auto getFormatNumRegs = [](
unsigned FormatVal) {
2173 switch (FormatVal) {
2187 bool MadeChange =
false;
2188 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
2189 unsigned Src1NumElts = getFormatNumRegs(BLGP);
2193 if (Src0Ty->getNumElements() > Src0NumElts) {
2200 if (Src1Ty->getNumElements() > Src1NumElts) {
2208 return std::nullopt;
2219 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
2220 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
2221 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
2222 Value *Src0 =
II.getArgOperand(1);
2223 Value *Src1 =
II.getArgOperand(3);
2229 bool MadeChange =
false;
2235 if (Src0Ty->getNumElements() > Src0NumElts) {
2242 if (Src1Ty->getNumElements() > Src1NumElts) {
2250 return std::nullopt;
2267 return std::nullopt;
2280 int DMaskIdx,
bool IsLoad) {
2283 :
II.getOperand(0)->getType());
2284 unsigned VWidth = IIVTy->getNumElements();
2287 Type *EltTy = IIVTy->getElementType();
2299 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
2304 DemandedElts = (1 << ActiveBits) - 1;
2306 if (UnusedComponentsAtFront > 0) {
2307 static const unsigned InvalidOffsetIdx = 0xf;
2310 switch (
II.getIntrinsicID()) {
2311 case Intrinsic::amdgcn_raw_buffer_load:
2312 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2315 case Intrinsic::amdgcn_s_buffer_load:
2319 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
2320 OffsetIdx = InvalidOffsetIdx;
2324 case Intrinsic::amdgcn_struct_buffer_load:
2325 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2330 OffsetIdx = InvalidOffsetIdx;
2334 if (OffsetIdx != InvalidOffsetIdx) {
2336 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
2337 auto *
Offset = Args[OffsetIdx];
2338 unsigned SingleComponentSizeInBits =
2340 unsigned OffsetAdd =
2341 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
2342 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
2359 unsigned NewDMaskVal = 0;
2360 unsigned OrigLdStIdx = 0;
2361 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
2362 const unsigned Bit = 1 << SrcIdx;
2363 if (!!(DMaskVal & Bit)) {
2364 if (!!DemandedElts[OrigLdStIdx])
2370 if (DMaskVal != NewDMaskVal)
2371 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
2374 unsigned NewNumElts = DemandedElts.
popcount();
2378 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
2380 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
2392 OverloadTys[0] = NewTy;
2396 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
2397 if (DemandedElts[OrigStoreIdx])
2400 if (NewNumElts == 1)
2410 AttributeList OldAttrList =
II.getAttributes();
2414 if (NewNumElts == 1) {
2420 unsigned NewLoadIdx = 0;
2421 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
2422 if (!!DemandedElts[OrigLoadIdx])
2438 APInt &UndefElts)
const {
2443 const unsigned FirstElt = DemandedElts.
countr_zero();
2445 const unsigned MaskLen = LastElt - FirstElt + 1;
2447 unsigned OldNumElts = VT->getNumElements();
2448 if (MaskLen == OldNumElts && MaskLen != 1)
2451 Type *EltTy = VT->getElementType();
2459 Value *Src =
II.getArgOperand(0);
2464 II.getOperandBundlesAsDefs(OpBundles);
2481 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2482 if (DemandedElts[FirstElt +
I])
2483 ExtractMask[
I] = FirstElt +
I;
2492 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2493 if (DemandedElts[FirstElt +
I])
2494 InsertMask[FirstElt +
I] =
I;
2506 SimplifyAndSetOp)
const {
2507 switch (
II.getIntrinsicID()) {
2508 case Intrinsic::amdgcn_readfirstlane:
2509 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2511 case Intrinsic::amdgcn_raw_buffer_load:
2512 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2513 case Intrinsic::amdgcn_raw_buffer_load_format:
2514 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
2515 case Intrinsic::amdgcn_raw_tbuffer_load:
2516 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
2517 case Intrinsic::amdgcn_s_buffer_load:
2518 case Intrinsic::amdgcn_struct_buffer_load:
2519 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2520 case Intrinsic::amdgcn_struct_buffer_load_format:
2521 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
2522 case Intrinsic::amdgcn_struct_tbuffer_load:
2523 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2526 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2532 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * createPermlane16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlane16 with the precomputed lane-select halves.
static std::optional< unsigned > matchRowSharePattern(ArrayRef< uint8_t > Ids)
Match a row-share pattern: all 16 lanes of each row read the same source lane.
static bool matchMirrorPattern(ArrayRef< uint8_t > Ids)
Match an N-lane reversal (mirror) pattern.
static bool tryBuildShuffleMap(Value *Index, const GCNSubtarget &ST, SmallVectorImpl< uint8_t > &Ids, const DataLayout &DL)
Build the per-lane shuffle map by evaluating Index for every lane in the wave.
static std::optional< unsigned > matchQuadPermPattern(ArrayRef< uint8_t > Ids)
Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids...
static std::optional< unsigned > matchDsSwizzleRotatePattern(ArrayRef< uint8_t > Ids)
Match a GFX9+ DS_SWIZZLE rotate-mode permutation: a cyclic left-rotation of all 32 lanes within each ...
static std::optional< unsigned > matchHalfRowPermPattern(ArrayRef< uint8_t > Ids)
Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per ...
static std::optional< unsigned > matchRowXMaskPattern(ArrayRef< uint8_t > Ids)
Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1,...
static constexpr auto matchHalfRowMirrorPattern
static Value * createPermlaneX16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlanex16 with the precomputed lane-select halves.
static bool isRowPattern(ArrayRef< uint8_t > Ids)
Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row,...
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static constexpr auto isFullRowPattern
static constexpr auto isQuadPattern
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static uint64_t computePermlane16Masks(ArrayRef< uint8_t > Ids)
Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4...
static bool matchHalfWaveSwapPattern(ArrayRef< uint8_t > Ids)
Match a half-wave swap: lane J reads from lane J ^ 32.
static bool hasPeriodicLayout(ArrayRef< uint8_t > Ids)
Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = I...
static std::optional< Instruction * > tryOptimizeShufflePattern(InstCombiner &IC, IntrinsicInst &II, const GCNSubtarget &ST)
Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a ...
static constexpr auto isHalfRowPattern
static APInt defaultComponentBroadcast(Value *V)
static std::optional< unsigned > matchDsSwizzleBitmaskPattern(ArrayRef< uint8_t > Ids)
Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask ...
static Value * createDsSwizzle(IRBuilderBase &B, Value *Val, unsigned Offset, const DataLayout &DL)
Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 a...
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static Value * matchShuffleToHWIntrinsic(IRBuilderBase &B, Value *Src, ArrayRef< uint8_t > Ids, const GCNSubtarget &ST, const DataLayout &DL)
Given a shuffle map, try to emit the best hardware intrinsic.
static std::optional< unsigned > matchRowRotatePattern(ArrayRef< uint8_t > Ids)
Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].
static bool isCrossRowPattern(ArrayRef< uint8_t > Ids)
Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads fr...
static bool isThreadID(const GCNSubtarget &ST, Value *V)
static Value * createUpdateDpp(IRBuilderBase &B, Value *Val, unsigned Ctrl)
Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds...
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * createMovDpp8(IRBuilderBase &B, Value *Val, unsigned Selector)
Emit v_mov_b32_dpp8 with the given 24-bit lane selector.
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
static constexpr auto matchFullRowMirrorPattern
static std::optional< unsigned > evalLaneExpr(Value *V, unsigned Lane, const GCNSubtarget &ST, const DataLayout &DL, unsigned Depth=0)
Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a c...
static Value * createPermlane64(IRBuilderBase &B, Value *Val)
Emit v_permlane64 (swap of the two 32-lane halves of a wave64).
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
Provides some synthesis utilities to produce sequences of values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
size_t size() const
Get the array size.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Tagged union holding either a T or a Error.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
ConstantInt * getTrue()
Get the constant value for i1 true.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
static Value * stripSignOnlyFPOps(Value *Val)
Ignore all operations which only change the sign of a value, returning the underlying magnitude value...
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
auto dyn_cast_or_null(const Y &Val)
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
constexpr unsigned MaxAnalysisRecursionDepth
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.