22#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "AMDGPUtti"
33struct AMDGPUImageDMaskIntrinsic {
37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
38#include "AMDGPUGenSearchableTables.inc"
68 Type *VTy = V.getType();
77 APFloat FloatValue(ConstFloat->getValueAPF());
78 bool LosesInfo =
true;
87 APInt IntValue(ConstInt->getValue());
106 Type *VTy = V.getType();
144 bool RemoveOldIntr = &OldIntr != &InstToReplace;
153static std::optional<Instruction *>
158 if (
const auto *LZMappingInfo =
160 if (
auto *ConstantLod =
162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
167 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
175 if (
const auto *MIPMappingInfo =
177 if (
auto *ConstantMip =
179 if (ConstantMip->isZero()) {
184 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
192 if (
const auto *BiasMappingInfo =
194 if (
auto *ConstantBias =
196 if (ConstantBias->isZero()) {
201 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
210 if (
const auto *OffsetMappingInfo =
212 if (
auto *ConstantOffset =
214 if (ConstantOffset->isZero()) {
217 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
219 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
227 if (ST->hasD16Images()) {
237 if (
II.hasOneUse()) {
240 if (
User->getOpcode() == Instruction::FPTrunc &&
244 [&](
auto &Args,
auto &ArgTys) {
247 ArgTys[0] = User->getType();
256 bool AllHalfExtracts =
true;
258 for (
User *U :
II.users()) {
260 if (!Ext || !Ext->hasOneUse()) {
261 AllHalfExtracts =
false;
266 if (!Tr || !Tr->getType()->isHalfTy()) {
267 AllHalfExtracts =
false;
274 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
283 SigTys[0] = HalfVecTy;
289 II.mutateType(HalfVecTy);
290 II.setCalledFunction(HalfDecl);
293 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
294 Value *Idx = Ext->getIndexOperand();
296 Builder.SetInsertPoint(Tr);
298 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
301 Tr->replaceAllUsesWith(HalfExtract);
304 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
315 if (!ST->hasA16() && !ST->hasG16())
322 bool FloatCoord =
false;
324 bool OnlyDerivatives =
false;
327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
328 Value *Coord =
II.getOperand(OperandIndex);
331 if (OperandIndex < ImageDimIntr->CoordStart ||
336 OnlyDerivatives =
true;
345 if (!OnlyDerivatives && !ST->hasA16())
346 OnlyDerivatives =
true;
349 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
352 "Only image instructions with a sampler can have a bias");
354 OnlyDerivatives =
true;
357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
365 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
367 if (!OnlyDerivatives) {
368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
371 if (ImageDimIntr->NumBiasArgs != 0)
372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
378 OperandIndex < EndIndex; OperandIndex++) {
380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
415 Value *Src =
nullptr;
418 if (Src->getType()->isHalfTy())
435 unsigned VWidth = VTy->getNumElements();
438 for (
int i = VWidth - 1; i > 0; --i) {
460 unsigned VWidth = VTy->getNumElements();
466 SVI->getShuffleMask(ShuffleMask);
468 for (
int I = VWidth - 1;
I > 0; --
I) {
469 if (ShuffleMask.empty()) {
520 unsigned LaneArgIdx)
const {
521 unsigned MaskBits = ST->getWavefrontSizeLog2();
535 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
538 if (MaskedConst != LaneArg) {
539 II.getOperandUse(LaneArgIdx).set(MaskedConst);
551 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
567 if (ST.isWave32() &&
match(V, W32Pred))
569 if (ST.isWave64() &&
match(V, W64Pred))
577static std::optional<Instruction *>
579 Value *Val =
II.getArgOperand(0);
580 Value *Idx =
II.getArgOperand(1);
584 if (!ST.isWaveSizeKnown() || !ST.hasDPPRowShare())
590 bool CanDPP16RowShare =
false;
594 uint64_t MaskCheck = (1UL << ST.getWavefrontSizeLog2()) - 1;
595 uint64_t MaskTarget = MaskCheck & 0xF0;
608 if ((Mask & MaskCheck) != MaskTarget)
612 CanDPP16RowShare =
true;
613 }
else if (
match(Idx, RowSharePred) &&
isThreadID(ST, Tid) && RowIdx < 15 &&
615 if ((Mask & MaskCheck) != MaskTarget)
618 CanDPP16RowShare =
true;
621 CanDPP16RowShare =
true;
624 if (CanDPP16RowShare) {
626 B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Val->
getType(),
627 {PoisonValue::get(Val->getType()), Val,
628 B.getInt32(AMDGPU::DPP::ROW_SHARE0 | RowIdx),
629 B.getInt32(0xF), B.getInt32(0xF), B.getFalse()});
642 const auto IID =
II.getIntrinsicID();
643 assert(IID == Intrinsic::amdgcn_readlane ||
644 IID == Intrinsic::amdgcn_readfirstlane ||
645 IID == Intrinsic::amdgcn_permlane64);
655 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
659 Value *LaneID =
nullptr;
661 LaneID =
II.getOperand(1);
675 const auto DoIt = [&](
unsigned OpIdx,
679 Ops.push_back(LaneID);
695 return DoIt(0,
II.getCalledFunction());
699 Type *SrcTy = Src->getType();
705 return DoIt(0, Remangled);
713 return DoIt(1,
II.getCalledFunction());
715 return DoIt(0,
II.getCalledFunction());
721std::optional<Instruction *>
725 case Intrinsic::amdgcn_implicitarg_ptr: {
726 if (
II.getFunction()->hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
728 uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*
II.getFunction());
731 II.getAttributes().getRetDereferenceableOrNullBytes();
732 if (CurrentOrNullBytes != 0) {
735 uint64_t NewBytes = std::max(CurrentOrNullBytes, ImplicitArgBytes);
738 II.removeRetAttr(Attribute::DereferenceableOrNull);
742 uint64_t CurrentBytes =
II.getAttributes().getRetDereferenceableBytes();
743 uint64_t NewBytes = std::max(CurrentBytes, ImplicitArgBytes);
744 if (NewBytes != CurrentBytes) {
752 case Intrinsic::amdgcn_rcp: {
753 Value *Src =
II.getArgOperand(0);
768 const APFloat &ArgVal =
C->getValueAPF();
786 auto IID = SrcCI->getIntrinsicID();
791 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
801 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
804 II.setFastMathFlags(InnerFMF);
806 II.setCalledFunction(NewDecl);
812 case Intrinsic::amdgcn_sqrt:
813 case Intrinsic::amdgcn_rsq:
814 case Intrinsic::amdgcn_tanh: {
815 Value *Src =
II.getArgOperand(0);
827 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
829 II.getModule(), Intrinsic::sqrt, {II.getType()});
830 II.setCalledFunction(NewDecl);
836 case Intrinsic::amdgcn_log:
837 case Intrinsic::amdgcn_exp2: {
838 const bool IsLog = IID == Intrinsic::amdgcn_log;
839 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
840 Value *Src =
II.getArgOperand(0);
850 if (
C->isInfinity()) {
853 if (!
C->isNegative())
857 if (IsExp &&
C->isNegative())
865 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
870 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
872 : ConstantFP::get(Ty, 1.0);
876 if (IsLog &&
C->isNegative())
884 case Intrinsic::amdgcn_frexp_mant:
885 case Intrinsic::amdgcn_frexp_exp: {
886 Value *Src =
II.getArgOperand(0);
892 if (IID == Intrinsic::amdgcn_frexp_mant) {
894 II, ConstantFP::get(
II.getContext(), Significand));
914 case Intrinsic::amdgcn_class: {
915 Value *Src0 =
II.getArgOperand(0);
916 Value *Src1 =
II.getArgOperand(1);
920 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
923 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
944 case Intrinsic::amdgcn_cvt_pkrtz: {
945 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
958 return ConstantFP::get(HalfTy, Val);
961 Value *Src =
nullptr;
963 if (Src->getType()->isHalfTy())
970 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
971 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
981 case Intrinsic::amdgcn_cvt_pknorm_i16:
982 case Intrinsic::amdgcn_cvt_pknorm_u16:
983 case Intrinsic::amdgcn_cvt_pk_i16:
984 case Intrinsic::amdgcn_cvt_pk_u16: {
985 Value *Src0 =
II.getArgOperand(0);
986 Value *Src1 =
II.getArgOperand(1);
998 case Intrinsic::amdgcn_cvt_off_f32_i4: {
999 Value* Arg =
II.getArgOperand(0);
1013 constexpr size_t ResValsSize = 16;
1014 static constexpr float ResVals[ResValsSize] = {
1015 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
1016 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
1018 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
1021 case Intrinsic::amdgcn_ubfe:
1022 case Intrinsic::amdgcn_sbfe: {
1024 Value *Src =
II.getArgOperand(0);
1031 unsigned IntSize = Ty->getIntegerBitWidth();
1036 if ((Width & (IntSize - 1)) == 0) {
1041 if (Width >= IntSize) {
1043 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
1054 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
1058 bool Signed = IID == Intrinsic::amdgcn_sbfe;
1060 if (!CWidth || !COffset)
1070 if (
Offset + Width < IntSize) {
1074 RightShift->takeName(&
II);
1081 RightShift->takeName(&
II);
1084 case Intrinsic::amdgcn_exp:
1085 case Intrinsic::amdgcn_exp_row:
1086 case Intrinsic::amdgcn_exp_compr: {
1092 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
1094 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
1095 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
1096 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
1097 Value *Src =
II.getArgOperand(
I + 2);
1111 case Intrinsic::amdgcn_fmed3: {
1112 Value *Src0 =
II.getArgOperand(0);
1113 Value *Src1 =
II.getArgOperand(1);
1114 Value *Src2 =
II.getArgOperand(2);
1116 for (
Value *Src : {Src0, Src1, Src2}) {
1121 if (
II.isStrictFP())
1158 const APFloat *ConstSrc0 =
nullptr;
1159 const APFloat *ConstSrc1 =
nullptr;
1160 const APFloat *ConstSrc2 =
nullptr;
1165 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1185 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1208 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1228 CI->copyFastMathFlags(&
II);
1254 II.setArgOperand(0, Src0);
1255 II.setArgOperand(1, Src1);
1256 II.setArgOperand(2, Src2);
1266 ConstantFP::get(
II.getType(), Result));
1271 if (!ST->hasMed3_16())
1280 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1288 case Intrinsic::amdgcn_icmp:
1289 case Intrinsic::amdgcn_fcmp: {
1293 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1300 Value *Src0 =
II.getArgOperand(0);
1301 Value *Src1 =
II.getArgOperand(1);
1321 II.getType(), Args);
1322 NewCall->
addFnAttr(Attribute::Convergent);
1330 II.setArgOperand(0, Src1);
1331 II.setArgOperand(1, Src0);
1333 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1380 ? Intrinsic::amdgcn_fcmp
1381 : Intrinsic::amdgcn_icmp;
1386 unsigned Width = CmpType->getBitWidth();
1387 unsigned NewWidth = Width;
1395 else if (Width <= 32)
1397 else if (Width <= 64)
1402 if (Width != NewWidth) {
1412 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1415 Value *Args[] = {SrcLHS, SrcRHS,
1416 ConstantInt::get(CC->
getType(), SrcPred)};
1418 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1425 case Intrinsic::amdgcn_mbcnt_hi: {
1431 case Intrinsic::amdgcn_ballot: {
1432 Value *Arg =
II.getArgOperand(0);
1437 if (Src->isZero()) {
1442 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1449 {IC.Builder.getInt32Ty()},
1450 {II.getArgOperand(0)}),
1457 case Intrinsic::amdgcn_wavefrontsize: {
1458 if (ST->isWaveSizeKnown())
1460 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1463 case Intrinsic::amdgcn_wqm_vote: {
1470 case Intrinsic::amdgcn_kill: {
1472 if (!
C || !
C->getZExtValue())
1478 case Intrinsic::amdgcn_s_sendmsg:
1479 case Intrinsic::amdgcn_s_sendmsghalt: {
1485 Value *M0Val =
II.getArgOperand(1);
1491 decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
1493 if (!msgDoesNotUseM0(MsgId, *ST))
1497 II.dropUBImplyingAttrsAndMetadata();
1501 case Intrinsic::amdgcn_update_dpp: {
1502 Value *Old =
II.getArgOperand(0);
1507 if (BC->isNullValue() || RM->getZExtValue() != 0xF ||
1514 case Intrinsic::amdgcn_permlane16:
1515 case Intrinsic::amdgcn_permlane16_var:
1516 case Intrinsic::amdgcn_permlanex16:
1517 case Intrinsic::amdgcn_permlanex16_var: {
1519 Value *VDstIn =
II.getArgOperand(0);
1524 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1525 IID == Intrinsic::amdgcn_permlanex16)
1532 unsigned int BcIdx = FiIdx + 1;
1541 case Intrinsic::amdgcn_permlane64:
1542 case Intrinsic::amdgcn_readfirstlane:
1543 case Intrinsic::amdgcn_readlane:
1544 case Intrinsic::amdgcn_ds_bpermute: {
1546 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1547 const Use &Src =
II.getArgOperandUse(SrcIdx);
1551 if (IID == Intrinsic::amdgcn_readlane &&
1558 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1559 const Use &Lane =
II.getArgOperandUse(0);
1563 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1564 II.setCalledFunction(NewDecl);
1565 II.setOperand(0, Src);
1566 II.setOperand(1, NewLane);
1571 if (IID != Intrinsic::amdgcn_ds_bpermute) {
1576 return std::nullopt;
1578 case Intrinsic::amdgcn_writelane: {
1582 return std::nullopt;
1584 case Intrinsic::amdgcn_trig_preop: {
1587 if (!
II.getType()->isDoubleTy())
1590 Value *Src =
II.getArgOperand(0);
1591 Value *Segment =
II.getArgOperand(1);
1600 if (StrippedSign != Src)
1603 if (
II.isStrictFP())
1625 unsigned Shift = SegmentVal * 53;
1630 static const uint32_t TwoByPi[] = {
1631 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1632 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1633 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1634 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1635 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1636 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1640 unsigned Idx = Shift >> 5;
1641 if (Idx + 2 >= std::size(TwoByPi)) {
1646 unsigned BShift = Shift & 0x1f;
1650 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1654 int Scale = -53 - Shift;
1661 case Intrinsic::amdgcn_fmul_legacy: {
1662 Value *Op0 =
II.getArgOperand(0);
1663 Value *Op1 =
II.getArgOperand(1);
1665 for (
Value *Src : {Op0, Op1}) {
1686 case Intrinsic::amdgcn_fma_legacy: {
1687 Value *Op0 =
II.getArgOperand(0);
1688 Value *Op1 =
II.getArgOperand(1);
1689 Value *Op2 =
II.getArgOperand(2);
1691 for (
Value *Src : {Op0, Op1, Op2}) {
1713 II.getModule(), Intrinsic::fma,
II.getType()));
1718 case Intrinsic::amdgcn_is_shared:
1719 case Intrinsic::amdgcn_is_private: {
1720 Value *Src =
II.getArgOperand(0);
1730 case Intrinsic::amdgcn_make_buffer_rsrc: {
1731 Value *Src =
II.getArgOperand(0);
1734 return std::nullopt;
1736 case Intrinsic::amdgcn_raw_buffer_store_format:
1737 case Intrinsic::amdgcn_struct_buffer_store_format:
1738 case Intrinsic::amdgcn_raw_tbuffer_store:
1739 case Intrinsic::amdgcn_struct_tbuffer_store:
1740 case Intrinsic::amdgcn_image_store_1d:
1741 case Intrinsic::amdgcn_image_store_1darray:
1742 case Intrinsic::amdgcn_image_store_2d:
1743 case Intrinsic::amdgcn_image_store_2darray:
1744 case Intrinsic::amdgcn_image_store_2darraymsaa:
1745 case Intrinsic::amdgcn_image_store_2dmsaa:
1746 case Intrinsic::amdgcn_image_store_3d:
1747 case Intrinsic::amdgcn_image_store_cube:
1748 case Intrinsic::amdgcn_image_store_mip_1d:
1749 case Intrinsic::amdgcn_image_store_mip_1darray:
1750 case Intrinsic::amdgcn_image_store_mip_2d:
1751 case Intrinsic::amdgcn_image_store_mip_2darray:
1752 case Intrinsic::amdgcn_image_store_mip_3d:
1753 case Intrinsic::amdgcn_image_store_mip_cube: {
1758 if (ST->hasDefaultComponentBroadcast())
1760 else if (ST->hasDefaultComponentZero())
1765 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
1773 case Intrinsic::amdgcn_prng_b32: {
1774 auto *Src =
II.getArgOperand(0);
1778 return std::nullopt;
1780 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1781 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1782 Value *Src0 =
II.getArgOperand(0);
1783 Value *Src1 =
II.getArgOperand(1);
1789 auto getFormatNumRegs = [](
unsigned FormatVal) {
1790 switch (FormatVal) {
1804 bool MadeChange =
false;
1805 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
1806 unsigned Src1NumElts = getFormatNumRegs(BLGP);
1810 if (Src0Ty->getNumElements() > Src0NumElts) {
1817 if (Src1Ty->getNumElements() > Src1NumElts) {
1825 return std::nullopt;
1836 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
1837 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1838 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
1839 Value *Src0 =
II.getArgOperand(1);
1840 Value *Src1 =
II.getArgOperand(3);
1846 bool MadeChange =
false;
1852 if (Src0Ty->getNumElements() > Src0NumElts) {
1859 if (Src1Ty->getNumElements() > Src1NumElts) {
1867 return std::nullopt;
1879 case Intrinsic::amdgcn_wave_shuffle: {
1881 return std::nullopt;
1890 return std::nullopt;
1903 int DMaskIdx,
bool IsLoad) {
1906 :
II.getOperand(0)->getType());
1907 unsigned VWidth = IIVTy->getNumElements();
1910 Type *EltTy = IIVTy->getElementType();
1922 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
1927 DemandedElts = (1 << ActiveBits) - 1;
1929 if (UnusedComponentsAtFront > 0) {
1930 static const unsigned InvalidOffsetIdx = 0xf;
1933 switch (
II.getIntrinsicID()) {
1934 case Intrinsic::amdgcn_raw_buffer_load:
1935 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1938 case Intrinsic::amdgcn_s_buffer_load:
1942 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1943 OffsetIdx = InvalidOffsetIdx;
1947 case Intrinsic::amdgcn_struct_buffer_load:
1948 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1953 OffsetIdx = InvalidOffsetIdx;
1957 if (OffsetIdx != InvalidOffsetIdx) {
1959 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1960 auto *
Offset = Args[OffsetIdx];
1961 unsigned SingleComponentSizeInBits =
1963 unsigned OffsetAdd =
1964 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1965 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
1982 unsigned NewDMaskVal = 0;
1983 unsigned OrigLdStIdx = 0;
1984 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1985 const unsigned Bit = 1 << SrcIdx;
1986 if (!!(DMaskVal & Bit)) {
1987 if (!!DemandedElts[OrigLdStIdx])
1993 if (DMaskVal != NewDMaskVal)
1994 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
1997 unsigned NewNumElts = DemandedElts.
popcount();
2001 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
2003 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
2015 OverloadTys[0] = NewTy;
2019 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
2020 if (DemandedElts[OrigStoreIdx])
2023 if (NewNumElts == 1)
2035 if (NewNumElts == 1) {
2041 unsigned NewLoadIdx = 0;
2042 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
2043 if (!!DemandedElts[OrigLoadIdx])
2059 APInt &UndefElts)
const {
2064 const unsigned FirstElt = DemandedElts.
countr_zero();
2066 const unsigned MaskLen = LastElt - FirstElt + 1;
2068 unsigned OldNumElts = VT->getNumElements();
2069 if (MaskLen == OldNumElts && MaskLen != 1)
2072 Type *EltTy = VT->getElementType();
2080 Value *Src =
II.getArgOperand(0);
2085 II.getOperandBundlesAsDefs(OpBundles);
2102 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2103 if (DemandedElts[FirstElt +
I])
2104 ExtractMask[
I] = FirstElt +
I;
2113 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2114 if (DemandedElts[FirstElt +
I])
2115 InsertMask[FirstElt +
I] =
I;
2127 SimplifyAndSetOp)
const {
2128 switch (
II.getIntrinsicID()) {
2129 case Intrinsic::amdgcn_readfirstlane:
2130 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2132 case Intrinsic::amdgcn_raw_buffer_load:
2133 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2134 case Intrinsic::amdgcn_raw_buffer_load_format:
2135 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
2136 case Intrinsic::amdgcn_raw_tbuffer_load:
2137 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
2138 case Intrinsic::amdgcn_s_buffer_load:
2139 case Intrinsic::amdgcn_struct_buffer_load:
2140 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2141 case Intrinsic::amdgcn_struct_buffer_load_format:
2142 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
2143 case Intrinsic::amdgcn_struct_tbuffer_load:
2144 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2147 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2153 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast(Value *V)
static std::optional< Instruction * > tryWaveShuffleDPP(const GCNSubtarget &ST, InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static bool isThreadID(const GCNSubtarget &ST, Value *V)
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
static Value * stripSignOnlyFPOps(Value *Val)
Ignore all operations which only change the sign of a value, returning the underlying magnitude value...
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.