28#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "AMDGPUtti"
40struct AMDGPUImageDMaskIntrinsic {
44#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
45#include "AMDGPUGenSearchableTables.inc"
56 "nans handled separately");
74 bool AllowI16SExt =
false) {
75 Type *VTy = V.getType();
84 APFloat FloatValue(ConstFloat->getValueAPF());
85 bool LosesInfo =
true;
94 APInt IntValue(ConstInt->getValue());
102 if (!IsExt && !IsFloat && AllowI16SExt)
115 Type *VTy = V.getType();
141 Func(Args, OverloadTys);
157 bool RemoveOldIntr = &OldIntr != &InstToReplace;
166static std::optional<Instruction *>
171 if (
const auto *LZMappingInfo =
173 if (
auto *ConstantLod =
175 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
180 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
181 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
188 if (
const auto *MIPMappingInfo =
190 if (
auto *ConstantMip =
192 if (ConstantMip->isZero()) {
197 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
198 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
205 if (
const auto *BiasMappingInfo =
207 if (
auto *ConstantBias =
209 if (ConstantBias->isZero()) {
214 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
215 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
216 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
223 if (
const auto *OffsetMappingInfo =
225 if (
auto *ConstantOffset =
227 if (ConstantOffset->isZero()) {
230 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
232 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
233 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
240 if (ST->hasD16Images()) {
250 if (
II.hasOneUse()) {
253 if (
User->getOpcode() == Instruction::FPTrunc &&
257 [&](
auto &Args,
auto &ArgTys) {
260 ArgTys[0] = User->getType();
269 bool AllHalfExtracts =
true;
271 for (
User *U :
II.users()) {
273 if (!Ext || !Ext->hasOneUse()) {
274 AllHalfExtracts =
false;
279 if (!Tr || !Tr->getType()->isHalfTy()) {
280 AllHalfExtracts =
false;
287 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
298 OverloadTys[0] = HalfVecTy;
301 M, ImageDimIntr->
Intr, OverloadTys);
303 II.mutateType(HalfVecTy);
304 II.setCalledFunction(HalfDecl);
307 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
308 Value *Idx = Ext->getIndexOperand();
310 Builder.SetInsertPoint(Tr);
312 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
315 Tr->replaceAllUsesWith(HalfExtract);
318 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
329 if (!ST->hasA16() && !ST->hasG16())
336 bool FloatCoord =
false;
338 bool OnlyDerivatives =
false;
343 bool AllowI16SExt = !HasSampler;
346 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
347 Value *Coord =
II.getOperand(OperandIndex);
350 if (OperandIndex < ImageDimIntr->CoordStart ||
355 OnlyDerivatives =
true;
364 if (!OnlyDerivatives && !ST->hasA16())
365 OnlyDerivatives =
true;
368 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
371 "Only image instructions with a sampler can have a bias");
373 OnlyDerivatives =
true;
376 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
384 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
385 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
386 if (!OnlyDerivatives) {
387 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
390 if (ImageDimIntr->NumBiasArgs != 0)
391 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
397 OperandIndex < EndIndex; OperandIndex++) {
399 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
404 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
405 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
434 Value *Src =
nullptr;
437 if (Src->getType()->isHalfTy())
454 unsigned VWidth = VTy->getNumElements();
457 for (
int i = VWidth - 1; i > 0; --i) {
479 unsigned VWidth = VTy->getNumElements();
485 SVI->getShuffleMask(ShuffleMask);
487 for (
int I = VWidth - 1;
I > 0; --
I) {
488 if (ShuffleMask.empty()) {
539 unsigned LaneArgIdx)
const {
540 unsigned MaskBits = ST->getWavefrontSizeLog2();
554 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
557 if (MaskedConst != LaneArg) {
558 II.getOperandUse(LaneArgIdx).set(MaskedConst);
570 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
586 if (ST.isWave32() &&
match(V, W32Pred))
588 if (ST.isWave64() &&
match(V, W64Pred))
597 const auto IID =
II.getIntrinsicID();
598 assert(IID == Intrinsic::amdgcn_readlane ||
599 IID == Intrinsic::amdgcn_readfirstlane ||
600 IID == Intrinsic::amdgcn_permlane64);
610 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
614 Value *LaneID =
nullptr;
616 LaneID =
II.getOperand(1);
630 const auto DoIt = [&](
unsigned OpIdx,
634 Ops.push_back(LaneID);
650 return DoIt(0,
II.getCalledFunction());
654 Type *SrcTy = Src->getType();
660 return DoIt(0, Remangled);
668 return DoIt(1,
II.getCalledFunction());
670 return DoIt(0,
II.getCalledFunction());
681 unsigned Depth = 0) {
691 return CI->getZExtValue();
700 std::optional<unsigned>
LHS =
704 std::optional<unsigned>
RHS =
713 return CI ? std::optional<unsigned>(CI->getZExtValue()) : std::nullopt;
721 unsigned WaveSize = ST.getWavefrontSize();
723 for (
unsigned Lane :
seq(WaveSize)) {
725 if (!Val || *Val >= WaveSize)
734template <
unsigned Period>
736 static_assert(
isPowerOf2_32(Period),
"Period must be a power of two");
737 for (
unsigned I = Period,
E = Ids.
size();
I <
E; ++
I)
738 if (Ids[
I] != Ids[
I % Period] + (
I & ~(Period - 1)))
746 for (
unsigned I = 0;
I <
N; ++
I)
762 return Ids[3] << 6 | Ids[2] << 4 | Ids[1] << 2 | Ids[0];
769 for (
unsigned J = 0; J <
N; ++J)
770 if (Ids[J] != (
N - 1) - J)
782 for (
unsigned J = 1; J < 16; ++J)
783 if (Ids[J] != (Ids[0] + J) % 16)
801 unsigned Mask = Ids[0];
804 for (
unsigned J = 0; J < 16; ++J)
805 if (Ids[J] != (Mask ^ J))
815 unsigned Selector = 0;
816 for (
unsigned J = 0; J < 8; ++J)
817 Selector |= Ids[J] << (J * 3);
826 for (
unsigned J = 0; J < 16; ++J)
827 Sel |=
static_cast<uint64_t>(Ids[J] & 0xF) << (J * 4);
834 if (Ids.
size() != 64)
836 for (
unsigned J = 0; J < 64; ++J)
837 if (Ids[J] != (J ^ 32))
848 for (
unsigned J = 0; J < 16; ++J) {
849 if (Ids[J] < 16 || Ids[J] >= 32)
851 if (Ids[J + 16] != Ids[J] - 16)
862static std::optional<unsigned>
871 unsigned AndMask = 0, OrMask = 0, XorMask = 0;
872 for (
unsigned B = 0;
B < 5; ++
B) {
873 unsigned Bit0 = (Ids[0] >>
B) & 1;
874 unsigned Bit1 = (Ids[1u <<
B] >>
B) & 1;
877 XorMask |= Bit0 <<
B;
885 for (
unsigned I :
seq(32u)) {
886 unsigned Expected = ((
I & AndMask) | OrMask) ^ XorMask;
901static std::optional<unsigned>
912 for (
unsigned I = 0;
I < 32; ++
I)
913 if (Ids[
I] != (
I +
N) % 32)
925 return B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, {Ty},
927 B.getInt32(0xF),
B.getInt32(0xF),
B.getTrue()});
932 return B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp8, {Val->
getType()},
933 {Val,
B.getInt32(Selector)});
940 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane16, {Ty},
942 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
950 return B.CreateIntrinsic(Intrinsic::amdgcn_permlanex16, {Ty},
952 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
960 assert(
DL.getTypeSizeInBits(OrigTy) == 32 &&
961 "ds_swizzle only supports 32-bit operands");
965 Src =
B.CreatePtrToInt(Src, I32Ty);
966 else if (OrigTy != I32Ty)
967 Src =
B.CreateBitCast(Src, I32Ty);
968 Value *Result =
B.CreateIntrinsic(Intrinsic::amdgcn_ds_swizzle, {},
971 return B.CreateIntToPtr(Result, OrigTy);
973 return B.CreateBitCast(Result, OrigTy);
979 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {Val->
getType()},
990 [](
const auto &
E) {
return E.value() ==
E.index(); }))
1014 if (ST.hasDPPRowShare()) {
1019 if (ST.hasDPP() && ST.hasGFX10Insts()) {
1029 if (ST.hasPermlane16Insts()) {
1049 if (ST.hasDsSwizzleRotateMode()) {
1062static std::optional<Instruction *>
1066 if (
DL.getTypeSizeInBits(
II.getType()) != 32)
1067 return std::nullopt;
1069 if (!ST.isWaveSizeKnown())
1070 return std::nullopt;
1072 unsigned WaveSize = ST.getWavefrontSize();
1073 bool IsBpermute =
II.getIntrinsicID() == Intrinsic::amdgcn_ds_bpermute;
1074 Value *Src =
II.getArgOperand(IsBpermute ? 1 : 0);
1075 Value *Index =
II.getArgOperand(IsBpermute ? 0 : 1);
1080 for (
unsigned Lane :
seq(WaveSize)) {
1082 if (!Val || (*Val & 3) || (*Val >> 2) >= WaveSize)
1083 return std::nullopt;
1084 Ids[Lane] = *Val >> 2;
1088 return std::nullopt;
1093 return std::nullopt;
1097std::optional<Instruction *>
1101 case Intrinsic::amdgcn_implicitarg_ptr: {
1102 if (
II.getFunction()->hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
1104 uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*
II.getFunction());
1107 II.getAttributes().getRetDereferenceableOrNullBytes();
1108 if (CurrentOrNullBytes != 0) {
1111 uint64_t NewBytes = std::max(CurrentOrNullBytes, ImplicitArgBytes);
1114 II.removeRetAttr(Attribute::DereferenceableOrNull);
1118 uint64_t CurrentBytes =
II.getAttributes().getRetDereferenceableBytes();
1119 uint64_t NewBytes = std::max(CurrentBytes, ImplicitArgBytes);
1120 if (NewBytes != CurrentBytes) {
1126 return std::nullopt;
1128 case Intrinsic::amdgcn_rcp: {
1129 Value *Src =
II.getArgOperand(0);
1140 if (
II.isStrictFP())
1144 const APFloat &ArgVal =
C->getValueAPF();
1162 auto IID = SrcCI->getIntrinsicID();
1167 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
1177 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
1180 II.setFastMathFlags(InnerFMF);
1182 II.setCalledFunction(NewDecl);
1188 case Intrinsic::amdgcn_sqrt:
1189 case Intrinsic::amdgcn_rsq:
1190 case Intrinsic::amdgcn_tanh: {
1191 Value *Src =
II.getArgOperand(0);
1203 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
1205 II.getModule(), Intrinsic::sqrt, {II.getType()});
1206 II.setCalledFunction(NewDecl);
1212 case Intrinsic::amdgcn_log:
1213 case Intrinsic::amdgcn_exp2: {
1214 const bool IsLog = IID == Intrinsic::amdgcn_log;
1215 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
1216 Value *Src =
II.getArgOperand(0);
1226 if (
C->isInfinity()) {
1229 if (!
C->isNegative())
1233 if (IsExp &&
C->isNegative())
1237 if (
II.isStrictFP())
1241 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
1246 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
1248 : ConstantFP::get(Ty, 1.0);
1252 if (IsLog &&
C->isNegative())
1260 case Intrinsic::amdgcn_frexp_mant:
1261 case Intrinsic::amdgcn_frexp_exp: {
1262 Value *Src =
II.getArgOperand(0);
1268 if (IID == Intrinsic::amdgcn_frexp_mant) {
1270 II, ConstantFP::get(
II.getContext(), Significand));
1290 case Intrinsic::amdgcn_class: {
1291 Value *Src0 =
II.getArgOperand(0);
1292 Value *Src1 =
II.getArgOperand(1);
1296 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
1299 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
1320 case Intrinsic::amdgcn_cvt_pkrtz: {
1321 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
1334 return ConstantFP::get(HalfTy, Val);
1337 Value *Src =
nullptr;
1339 if (Src->getType()->isHalfTy())
1346 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
1347 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
1357 case Intrinsic::amdgcn_cvt_pknorm_i16:
1358 case Intrinsic::amdgcn_cvt_pknorm_u16:
1359 case Intrinsic::amdgcn_cvt_pk_i16:
1360 case Intrinsic::amdgcn_cvt_pk_u16: {
1361 Value *Src0 =
II.getArgOperand(0);
1362 Value *Src1 =
II.getArgOperand(1);
1374 case Intrinsic::amdgcn_cvt_off_f32_i4: {
1375 Value* Arg =
II.getArgOperand(0);
1389 constexpr size_t ResValsSize = 16;
1390 static constexpr float ResVals[ResValsSize] = {
1391 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
1392 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
1394 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
1397 case Intrinsic::amdgcn_ubfe:
1398 case Intrinsic::amdgcn_sbfe: {
1400 Value *Src =
II.getArgOperand(0);
1407 unsigned IntSize = Ty->getIntegerBitWidth();
1412 if ((Width & (IntSize - 1)) == 0) {
1417 if (Width >= IntSize) {
1419 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
1430 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
1434 bool Signed = IID == Intrinsic::amdgcn_sbfe;
1436 if (!CWidth || !COffset)
1446 if (
Offset + Width < IntSize) {
1450 RightShift->takeName(&
II);
1457 RightShift->takeName(&
II);
1460 case Intrinsic::amdgcn_exp:
1461 case Intrinsic::amdgcn_exp_row:
1462 case Intrinsic::amdgcn_exp_compr: {
1468 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
1470 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
1471 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
1472 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
1473 Value *Src =
II.getArgOperand(
I + 2);
1487 case Intrinsic::amdgcn_fmed3: {
1488 Value *Src0 =
II.getArgOperand(0);
1489 Value *Src1 =
II.getArgOperand(1);
1490 Value *Src2 =
II.getArgOperand(2);
1492 for (
Value *Src : {Src0, Src1, Src2}) {
1497 if (
II.isStrictFP())
1534 const APFloat *ConstSrc0 =
nullptr;
1535 const APFloat *ConstSrc1 =
nullptr;
1536 const APFloat *ConstSrc2 =
nullptr;
1541 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1561 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1584 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1604 CI->copyFastMathFlags(&
II);
1630 II.setArgOperand(0, Src0);
1631 II.setArgOperand(1, Src1);
1632 II.setArgOperand(2, Src2);
1642 ConstantFP::get(
II.getType(), Result));
1647 if (!ST->hasMed3_16())
1656 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1664 case Intrinsic::amdgcn_icmp:
1665 case Intrinsic::amdgcn_fcmp: {
1669 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1676 Value *Src0 =
II.getArgOperand(0);
1677 Value *Src1 =
II.getArgOperand(1);
1704 II.setArgOperand(0, Src1);
1705 II.setArgOperand(1, Src0);
1707 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1754 ? Intrinsic::amdgcn_fcmp
1755 : Intrinsic::amdgcn_icmp;
1760 unsigned Width = CmpType->getBitWidth();
1761 unsigned NewWidth = Width;
1769 else if (Width <= 32)
1771 else if (Width <= 64)
1776 if (Width != NewWidth) {
1786 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1789 Value *Args[] = {SrcLHS, SrcRHS,
1790 ConstantInt::get(CC->
getType(), SrcPred)};
1792 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1799 case Intrinsic::amdgcn_mbcnt_hi:
1804 case Intrinsic::amdgcn_mbcnt_lo: {
1817 if (std::optional<ConstantRange> ExistingRange =
II.getRange()) {
1818 ComputedRange = ComputedRange.
intersectWith(*ExistingRange);
1819 if (ComputedRange == *ExistingRange)
1823 II.addRangeRetAttr(ComputedRange);
1826 case Intrinsic::amdgcn_ballot: {
1827 Value *Arg =
II.getArgOperand(0);
1832 if (Src->isZero()) {
1837 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1844 {IC.Builder.getInt32Ty()},
1845 {II.getArgOperand(0)}),
1852 case Intrinsic::amdgcn_wavefrontsize: {
1853 if (ST->isWaveSizeKnown())
1855 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1858 case Intrinsic::amdgcn_wqm_vote: {
1865 case Intrinsic::amdgcn_kill: {
1867 if (!
C || !
C->getZExtValue())
1873 case Intrinsic::amdgcn_s_sendmsg:
1874 case Intrinsic::amdgcn_s_sendmsghalt: {
1880 Value *M0Val =
II.getArgOperand(1);
1886 decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
1888 if (!msgDoesNotUseM0(MsgId, *ST))
1892 II.dropUBImplyingAttrsAndMetadata();
1896 case Intrinsic::amdgcn_update_dpp: {
1897 Value *Old =
II.getArgOperand(0);
1902 if (BC->isNullValue() || RM->getZExtValue() != 0xF ||
1909 case Intrinsic::amdgcn_permlane16:
1910 case Intrinsic::amdgcn_permlane16_var:
1911 case Intrinsic::amdgcn_permlanex16:
1912 case Intrinsic::amdgcn_permlanex16_var: {
1914 Value *VDstIn =
II.getArgOperand(0);
1919 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1920 IID == Intrinsic::amdgcn_permlanex16)
1927 unsigned int BcIdx = FiIdx + 1;
1936 case Intrinsic::amdgcn_wave_shuffle:
1938 case Intrinsic::amdgcn_permlane64:
1939 case Intrinsic::amdgcn_readfirstlane:
1940 case Intrinsic::amdgcn_readlane:
1941 case Intrinsic::amdgcn_ds_bpermute: {
1943 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1944 const Use &Src =
II.getArgOperandUse(SrcIdx);
1948 if (IID == Intrinsic::amdgcn_readlane &&
1955 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1956 const Use &Lane =
II.getArgOperandUse(0);
1960 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1961 II.setCalledFunction(NewDecl);
1962 II.setOperand(0, Src);
1963 II.setOperand(1, NewLane);
1968 if (IID == Intrinsic::amdgcn_ds_bpermute)
1974 return std::nullopt;
1976 case Intrinsic::amdgcn_writelane: {
1980 return std::nullopt;
1982 case Intrinsic::amdgcn_trig_preop: {
1985 if (!
II.getType()->isDoubleTy())
1988 Value *Src =
II.getArgOperand(0);
1989 Value *Segment =
II.getArgOperand(1);
1998 if (StrippedSign != Src)
2001 if (
II.isStrictFP())
2023 unsigned Shift = SegmentVal * 53;
2028 static const uint32_t TwoByPi[] = {
2029 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
2030 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
2031 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
2032 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
2033 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
2034 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
2038 unsigned Idx = Shift >> 5;
2039 if (Idx + 2 >= std::size(TwoByPi)) {
2044 unsigned BShift = Shift & 0x1f;
2048 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
2052 int Scale = -53 - Shift;
2059 case Intrinsic::amdgcn_fmul_legacy: {
2060 Value *Op0 =
II.getArgOperand(0);
2061 Value *Op1 =
II.getArgOperand(1);
2063 for (
Value *Src : {Op0, Op1}) {
2084 case Intrinsic::amdgcn_fma_legacy: {
2085 Value *Op0 =
II.getArgOperand(0);
2086 Value *Op1 =
II.getArgOperand(1);
2087 Value *Op2 =
II.getArgOperand(2);
2089 for (
Value *Src : {Op0, Op1, Op2}) {
2111 II.getModule(), Intrinsic::fma,
II.getType()));
2116 case Intrinsic::amdgcn_is_shared:
2117 case Intrinsic::amdgcn_is_private: {
2118 Value *Src =
II.getArgOperand(0);
2128 case Intrinsic::amdgcn_make_buffer_rsrc: {
2129 Value *Src =
II.getArgOperand(0);
2132 return std::nullopt;
2134 case Intrinsic::amdgcn_raw_buffer_store_format:
2135 case Intrinsic::amdgcn_struct_buffer_store_format:
2136 case Intrinsic::amdgcn_raw_tbuffer_store:
2137 case Intrinsic::amdgcn_struct_tbuffer_store:
2138 case Intrinsic::amdgcn_image_store_1d:
2139 case Intrinsic::amdgcn_image_store_1darray:
2140 case Intrinsic::amdgcn_image_store_2d:
2141 case Intrinsic::amdgcn_image_store_2darray:
2142 case Intrinsic::amdgcn_image_store_2darraymsaa:
2143 case Intrinsic::amdgcn_image_store_2dmsaa:
2144 case Intrinsic::amdgcn_image_store_3d:
2145 case Intrinsic::amdgcn_image_store_cube:
2146 case Intrinsic::amdgcn_image_store_mip_1d:
2147 case Intrinsic::amdgcn_image_store_mip_1darray:
2148 case Intrinsic::amdgcn_image_store_mip_2d:
2149 case Intrinsic::amdgcn_image_store_mip_2darray:
2150 case Intrinsic::amdgcn_image_store_mip_3d:
2151 case Intrinsic::amdgcn_image_store_mip_cube: {
2156 if (ST->hasDefaultComponentBroadcast())
2158 else if (ST->hasDefaultComponentZero())
2163 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
2171 case Intrinsic::amdgcn_prng_b32: {
2172 auto *Src =
II.getArgOperand(0);
2176 return std::nullopt;
2178 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
2179 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
2180 Value *Src0 =
II.getArgOperand(0);
2181 Value *Src1 =
II.getArgOperand(1);
2187 auto getFormatNumRegs = [](
unsigned FormatVal) {
2188 switch (FormatVal) {
2202 bool MadeChange =
false;
2203 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
2204 unsigned Src1NumElts = getFormatNumRegs(BLGP);
2208 if (Src0Ty->getNumElements() > Src0NumElts) {
2215 if (Src1Ty->getNumElements() > Src1NumElts) {
2223 return std::nullopt;
2234 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
2235 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
2236 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
2237 Value *Src0 =
II.getArgOperand(1);
2238 Value *Src1 =
II.getArgOperand(3);
2244 bool MadeChange =
false;
2250 if (Src0Ty->getNumElements() > Src0NumElts) {
2257 if (Src1Ty->getNumElements() > Src1NumElts) {
2265 return std::nullopt;
2282 return std::nullopt;
2295 int DMaskIdx,
bool IsLoad) {
2298 :
II.getOperand(0)->getType());
2299 unsigned VWidth = IIVTy->getNumElements();
2302 Type *EltTy = IIVTy->getElementType();
2314 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
2319 DemandedElts = (1 << ActiveBits) - 1;
2321 if (UnusedComponentsAtFront > 0) {
2322 static const unsigned InvalidOffsetIdx = 0xf;
2325 switch (
II.getIntrinsicID()) {
2326 case Intrinsic::amdgcn_raw_buffer_load:
2327 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2330 case Intrinsic::amdgcn_s_buffer_load:
2334 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
2335 OffsetIdx = InvalidOffsetIdx;
2339 case Intrinsic::amdgcn_struct_buffer_load:
2340 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2345 OffsetIdx = InvalidOffsetIdx;
2349 if (OffsetIdx != InvalidOffsetIdx) {
2351 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
2352 auto *
Offset = Args[OffsetIdx];
2353 unsigned SingleComponentSizeInBits =
2355 unsigned OffsetAdd =
2356 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
2357 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
2374 unsigned NewDMaskVal = 0;
2375 unsigned OrigLdStIdx = 0;
2376 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
2377 const unsigned Bit = 1 << SrcIdx;
2378 if (!!(DMaskVal & Bit)) {
2379 if (!!DemandedElts[OrigLdStIdx])
2385 if (DMaskVal != NewDMaskVal)
2386 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
2389 unsigned NewNumElts = DemandedElts.
popcount();
2393 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
2395 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
2407 OverloadTys[0] = NewTy;
2411 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
2412 if (DemandedElts[OrigStoreIdx])
2415 if (NewNumElts == 1)
2422 II.getIntrinsicID(), OverloadTys, Args);
2425 AttributeList OldAttrList =
II.getAttributes();
2429 if (NewNumElts == 1) {
2435 unsigned NewLoadIdx = 0;
2436 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
2437 if (!!DemandedElts[OrigLoadIdx])
2453 APInt &UndefElts)
const {
2458 const unsigned FirstElt = DemandedElts.
countr_zero();
2460 const unsigned MaskLen = LastElt - FirstElt + 1;
2462 unsigned OldNumElts = VT->getNumElements();
2463 if (MaskLen == OldNumElts && MaskLen != 1)
2466 Type *EltTy = VT->getElementType();
2474 Value *Src =
II.getArgOperand(0);
2479 II.getOperandBundlesAsDefs(OpBundles);
2496 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2497 if (DemandedElts[FirstElt +
I])
2498 ExtractMask[
I] = FirstElt +
I;
2507 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2508 if (DemandedElts[FirstElt +
I])
2509 InsertMask[FirstElt +
I] =
I;
2521 SimplifyAndSetOp)
const {
2522 switch (
II.getIntrinsicID()) {
2523 case Intrinsic::amdgcn_readfirstlane:
2524 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2526 case Intrinsic::amdgcn_raw_buffer_load:
2527 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2528 case Intrinsic::amdgcn_raw_buffer_load_format:
2529 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
2530 case Intrinsic::amdgcn_raw_tbuffer_load:
2531 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
2532 case Intrinsic::amdgcn_s_buffer_load:
2533 case Intrinsic::amdgcn_struct_buffer_load:
2534 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2535 case Intrinsic::amdgcn_struct_buffer_load_format:
2536 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
2537 case Intrinsic::amdgcn_struct_tbuffer_load:
2538 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2541 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2547 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * createPermlane16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlane16 with the precomputed lane-select halves.
static std::optional< unsigned > matchRowSharePattern(ArrayRef< uint8_t > Ids)
Match a row-share pattern: all 16 lanes of each row read the same source lane.
static bool matchMirrorPattern(ArrayRef< uint8_t > Ids)
Match an N-lane reversal (mirror) pattern.
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat, bool AllowI16SExt=false)
static bool tryBuildShuffleMap(Value *Index, const GCNSubtarget &ST, SmallVectorImpl< uint8_t > &Ids, const DataLayout &DL)
Build the per-lane shuffle map by evaluating Index for every lane in the wave.
static std::optional< unsigned > matchQuadPermPattern(ArrayRef< uint8_t > Ids)
Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids...
static std::optional< unsigned > matchDsSwizzleRotatePattern(ArrayRef< uint8_t > Ids)
Match a GFX9+ DS_SWIZZLE rotate-mode permutation: a cyclic left-rotation of all 32 lanes within each ...
static std::optional< unsigned > matchHalfRowPermPattern(ArrayRef< uint8_t > Ids)
Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per ...
static std::optional< unsigned > matchRowXMaskPattern(ArrayRef< uint8_t > Ids)
Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1,...
static constexpr auto matchHalfRowMirrorPattern
static Value * createPermlaneX16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlanex16 with the precomputed lane-select halves.
static bool isRowPattern(ArrayRef< uint8_t > Ids)
Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row,...
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static constexpr auto isFullRowPattern
static constexpr auto isQuadPattern
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static uint64_t computePermlane16Masks(ArrayRef< uint8_t > Ids)
Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4...
static bool matchHalfWaveSwapPattern(ArrayRef< uint8_t > Ids)
Match a half-wave swap: lane J reads from lane J ^ 32.
static bool hasPeriodicLayout(ArrayRef< uint8_t > Ids)
Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = I...
static std::optional< Instruction * > tryOptimizeShufflePattern(InstCombiner &IC, IntrinsicInst &II, const GCNSubtarget &ST)
Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a ...
static constexpr auto isHalfRowPattern
static APInt defaultComponentBroadcast(Value *V)
static std::optional< unsigned > matchDsSwizzleBitmaskPattern(ArrayRef< uint8_t > Ids)
Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask ...
static Value * createDsSwizzle(IRBuilderBase &B, Value *Val, unsigned Offset, const DataLayout &DL)
Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 a...
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static Value * matchShuffleToHWIntrinsic(IRBuilderBase &B, Value *Src, ArrayRef< uint8_t > Ids, const GCNSubtarget &ST, const DataLayout &DL)
Given a shuffle map, try to emit the best hardware intrinsic.
static std::optional< unsigned > matchRowRotatePattern(ArrayRef< uint8_t > Ids)
Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].
static bool isCrossRowPattern(ArrayRef< uint8_t > Ids)
Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads fr...
static bool isThreadID(const GCNSubtarget &ST, Value *V)
static Value * createUpdateDpp(IRBuilderBase &B, Value *Val, unsigned Ctrl)
Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds...
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static Value * createMovDpp8(IRBuilderBase &B, Value *Val, unsigned Selector)
Emit v_mov_b32_dpp8 with the given 24-bit lane selector.
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
static constexpr auto matchFullRowMirrorPattern
static std::optional< unsigned > evalLaneExpr(Value *V, unsigned Lane, const GCNSubtarget &ST, const DataLayout &DL, unsigned Depth=0)
Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a c...
static Value * createPermlane64(IRBuilderBase &B, Value *Val)
Emit v_permlane64 (swap of the two 32-lane halves of a wave64).
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
Provides some synthesis utilities to produce sequences of values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool bitwiseIsEqual(const APFloat &RHS) const
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
size_t size() const
Get the array size.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Tagged union holding either a T or a Error.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
LLVM_ABI CallInst * CreateIntrinsicWithoutFolding(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
ConstantInt * getTrue()
Get the constant value for i1 true.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVM_ABI Value * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={}, function_ref< void(CallInst *)> SetFn=[](CallInst *) {})
Variant to create a possibly constant-folded intrinsic.
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
static Value * stripSignOnlyFPOps(Value *Val)
Ignore all operations which only change the sign of a value, returning the underlying magnitude value...
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
auto dyn_cast_or_null(const Y &Val)
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
constexpr unsigned MaxAnalysisRecursionDepth
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.