22#include "llvm/IR/IntrinsicsAMDGPU.h"
29#define DEBUG_TYPE "AMDGPUtti"
33struct AMDGPUImageDMaskIntrinsic {
37#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
38#include "AMDGPUGenSearchableTables.inc"
68 Type *VTy = V.getType();
77 APFloat FloatValue(ConstFloat->getValueAPF());
78 bool LosesInfo =
true;
87 APInt IntValue(ConstInt->getValue());
106 Type *VTy = V.getType();
144 bool RemoveOldIntr = &OldIntr != &InstToReplace;
153static std::optional<Instruction *>
158 if (
const auto *LZMappingInfo =
160 if (
auto *ConstantLod =
162 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
167 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
168 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
175 if (
const auto *MIPMappingInfo =
177 if (
auto *ConstantMip =
179 if (ConstantMip->isZero()) {
184 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
185 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
192 if (
const auto *BiasMappingInfo =
194 if (
auto *ConstantBias =
196 if (ConstantBias->isZero()) {
201 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
202 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
203 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
210 if (
const auto *OffsetMappingInfo =
212 if (
auto *ConstantOffset =
214 if (ConstantOffset->isZero()) {
217 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
219 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
220 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
227 if (ST->hasD16Images()) {
237 if (
II.hasOneUse()) {
240 if (
User->getOpcode() == Instruction::FPTrunc &&
244 [&](
auto &Args,
auto &ArgTys) {
247 ArgTys[0] = User->getType();
256 bool AllHalfExtracts =
true;
258 for (
User *U :
II.users()) {
260 if (!Ext || !Ext->hasOneUse()) {
261 AllHalfExtracts =
false;
266 if (!Tr || !Tr->getType()->isHalfTy()) {
267 AllHalfExtracts =
false;
274 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
283 SigTys[0] = HalfVecTy;
289 II.mutateType(HalfVecTy);
290 II.setCalledFunction(HalfDecl);
293 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
294 Value *Idx = Ext->getIndexOperand();
296 Builder.SetInsertPoint(Tr);
298 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
301 Tr->replaceAllUsesWith(HalfExtract);
304 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
315 if (!ST->hasA16() && !ST->hasG16())
322 bool FloatCoord =
false;
324 bool OnlyDerivatives =
false;
327 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
328 Value *Coord =
II.getOperand(OperandIndex);
331 if (OperandIndex < ImageDimIntr->CoordStart ||
336 OnlyDerivatives =
true;
345 if (!OnlyDerivatives && !ST->hasA16())
346 OnlyDerivatives =
true;
349 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
352 "Only image instructions with a sampler can have a bias");
354 OnlyDerivatives =
true;
357 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
365 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
366 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
367 if (!OnlyDerivatives) {
368 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
371 if (ImageDimIntr->NumBiasArgs != 0)
372 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
378 OperandIndex < EndIndex; OperandIndex++) {
380 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
385 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
386 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
415 Value *Src =
nullptr;
418 if (Src->getType()->isHalfTy())
435 unsigned VWidth = VTy->getNumElements();
438 for (
int i = VWidth - 1; i > 0; --i) {
460 unsigned VWidth = VTy->getNumElements();
466 SVI->getShuffleMask(ShuffleMask);
468 for (
int I = VWidth - 1;
I > 0; --
I) {
469 if (ShuffleMask.empty()) {
520 unsigned LaneArgIdx)
const {
521 unsigned MaskBits = ST->getWavefrontSizeLog2();
535 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
538 if (MaskedConst != LaneArg) {
539 II.getOperandUse(LaneArgIdx).set(MaskedConst);
551 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
567 if (ST.isWave32() &&
match(V, W32Pred))
569 if (ST.isWave64() &&
match(V, W64Pred))
577static std::optional<Instruction *>
579 Value *Val =
II.getArgOperand(0);
580 Value *Idx =
II.getArgOperand(1);
584 if (!ST.isWaveSizeKnown() || !ST.hasDPPRowShare())
590 bool CanDPP16RowShare =
false;
594 uint64_t MaskCheck = (1UL << ST.getWavefrontSizeLog2()) - 1;
595 uint64_t MaskTarget = MaskCheck & 0xF0;
608 if ((Mask & MaskCheck) != MaskTarget)
612 CanDPP16RowShare =
true;
613 }
else if (
match(Idx, RowSharePred) &&
isThreadID(ST, Tid) && RowIdx < 15 &&
615 if ((Mask & MaskCheck) != MaskTarget)
618 CanDPP16RowShare =
true;
621 CanDPP16RowShare =
true;
624 if (CanDPP16RowShare) {
626 B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, Val->
getType(),
627 {PoisonValue::get(Val->getType()), Val,
628 B.getInt32(AMDGPU::DPP::ROW_SHARE0 | RowIdx),
629 B.getInt32(0xF), B.getInt32(0xF), B.getFalse()});
642 const auto IID =
II.getIntrinsicID();
643 assert(IID == Intrinsic::amdgcn_readlane ||
644 IID == Intrinsic::amdgcn_readfirstlane ||
645 IID == Intrinsic::amdgcn_permlane64);
655 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
659 Value *LaneID =
nullptr;
661 LaneID =
II.getOperand(1);
675 const auto DoIt = [&](
unsigned OpIdx,
679 Ops.push_back(LaneID);
695 return DoIt(0,
II.getCalledFunction());
699 Type *SrcTy = Src->getType();
705 return DoIt(0, Remangled);
713 return DoIt(1,
II.getCalledFunction());
715 return DoIt(0,
II.getCalledFunction());
721std::optional<Instruction *>
725 case Intrinsic::amdgcn_rcp: {
726 Value *Src =
II.getArgOperand(0);
741 const APFloat &ArgVal =
C->getValueAPF();
759 auto IID = SrcCI->getIntrinsicID();
764 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
774 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
777 II.setFastMathFlags(InnerFMF);
779 II.setCalledFunction(NewDecl);
785 case Intrinsic::amdgcn_sqrt:
786 case Intrinsic::amdgcn_rsq:
787 case Intrinsic::amdgcn_tanh: {
788 Value *Src =
II.getArgOperand(0);
800 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
802 II.getModule(), Intrinsic::sqrt, {II.getType()});
803 II.setCalledFunction(NewDecl);
809 case Intrinsic::amdgcn_log:
810 case Intrinsic::amdgcn_exp2: {
811 const bool IsLog = IID == Intrinsic::amdgcn_log;
812 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
813 Value *Src =
II.getArgOperand(0);
823 if (
C->isInfinity()) {
826 if (!
C->isNegative())
830 if (IsExp &&
C->isNegative())
838 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
843 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
845 : ConstantFP::get(Ty, 1.0);
849 if (IsLog &&
C->isNegative())
857 case Intrinsic::amdgcn_frexp_mant:
858 case Intrinsic::amdgcn_frexp_exp: {
859 Value *Src =
II.getArgOperand(0);
865 if (IID == Intrinsic::amdgcn_frexp_mant) {
867 II, ConstantFP::get(
II.getContext(), Significand));
887 case Intrinsic::amdgcn_class: {
888 Value *Src0 =
II.getArgOperand(0);
889 Value *Src1 =
II.getArgOperand(1);
893 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
896 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
917 case Intrinsic::amdgcn_cvt_pkrtz: {
918 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
931 return ConstantFP::get(HalfTy, Val);
934 Value *Src =
nullptr;
936 if (Src->getType()->isHalfTy())
943 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
944 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
954 case Intrinsic::amdgcn_cvt_pknorm_i16:
955 case Intrinsic::amdgcn_cvt_pknorm_u16:
956 case Intrinsic::amdgcn_cvt_pk_i16:
957 case Intrinsic::amdgcn_cvt_pk_u16: {
958 Value *Src0 =
II.getArgOperand(0);
959 Value *Src1 =
II.getArgOperand(1);
971 case Intrinsic::amdgcn_cvt_off_f32_i4: {
972 Value* Arg =
II.getArgOperand(0);
986 constexpr size_t ResValsSize = 16;
987 static constexpr float ResVals[ResValsSize] = {
988 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
989 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
991 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
994 case Intrinsic::amdgcn_ubfe:
995 case Intrinsic::amdgcn_sbfe: {
997 Value *Src =
II.getArgOperand(0);
1004 unsigned IntSize = Ty->getIntegerBitWidth();
1009 if ((Width & (IntSize - 1)) == 0) {
1014 if (Width >= IntSize) {
1016 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
1027 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
1031 bool Signed = IID == Intrinsic::amdgcn_sbfe;
1033 if (!CWidth || !COffset)
1043 if (
Offset + Width < IntSize) {
1047 RightShift->takeName(&
II);
1054 RightShift->takeName(&
II);
1057 case Intrinsic::amdgcn_exp:
1058 case Intrinsic::amdgcn_exp_row:
1059 case Intrinsic::amdgcn_exp_compr: {
1065 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
1067 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
1068 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
1069 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
1070 Value *Src =
II.getArgOperand(
I + 2);
1084 case Intrinsic::amdgcn_fmed3: {
1085 Value *Src0 =
II.getArgOperand(0);
1086 Value *Src1 =
II.getArgOperand(1);
1087 Value *Src2 =
II.getArgOperand(2);
1089 for (
Value *Src : {Src0, Src1, Src2}) {
1094 if (
II.isStrictFP())
1131 const APFloat *ConstSrc0 =
nullptr;
1132 const APFloat *ConstSrc1 =
nullptr;
1133 const APFloat *ConstSrc2 =
nullptr;
1138 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1158 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1181 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1201 CI->copyFastMathFlags(&
II);
1227 II.setArgOperand(0, Src0);
1228 II.setArgOperand(1, Src1);
1229 II.setArgOperand(2, Src2);
1239 ConstantFP::get(
II.getType(), Result));
1244 if (!ST->hasMed3_16())
1253 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1261 case Intrinsic::amdgcn_icmp:
1262 case Intrinsic::amdgcn_fcmp: {
1266 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1273 Value *Src0 =
II.getArgOperand(0);
1274 Value *Src1 =
II.getArgOperand(1);
1294 II.getType(), Args);
1295 NewCall->
addFnAttr(Attribute::Convergent);
1303 II.setArgOperand(0, Src1);
1304 II.setArgOperand(1, Src0);
1306 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1353 ? Intrinsic::amdgcn_fcmp
1354 : Intrinsic::amdgcn_icmp;
1359 unsigned Width = CmpType->getBitWidth();
1360 unsigned NewWidth = Width;
1368 else if (Width <= 32)
1370 else if (Width <= 64)
1375 if (Width != NewWidth) {
1385 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1388 Value *Args[] = {SrcLHS, SrcRHS,
1389 ConstantInt::get(CC->
getType(), SrcPred)};
1391 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1398 case Intrinsic::amdgcn_mbcnt_hi: {
1404 case Intrinsic::amdgcn_ballot: {
1405 Value *Arg =
II.getArgOperand(0);
1410 if (Src->isZero()) {
1415 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1422 {IC.Builder.getInt32Ty()},
1423 {II.getArgOperand(0)}),
1430 case Intrinsic::amdgcn_wavefrontsize: {
1431 if (ST->isWaveSizeKnown())
1433 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1436 case Intrinsic::amdgcn_wqm_vote: {
1443 case Intrinsic::amdgcn_kill: {
1445 if (!
C || !
C->getZExtValue())
1451 case Intrinsic::amdgcn_update_dpp: {
1452 Value *Old =
II.getArgOperand(0);
1457 if (BC->isNullValue() || RM->getZExtValue() != 0xF ||
1464 case Intrinsic::amdgcn_permlane16:
1465 case Intrinsic::amdgcn_permlane16_var:
1466 case Intrinsic::amdgcn_permlanex16:
1467 case Intrinsic::amdgcn_permlanex16_var: {
1469 Value *VDstIn =
II.getArgOperand(0);
1474 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1475 IID == Intrinsic::amdgcn_permlanex16)
1482 unsigned int BcIdx = FiIdx + 1;
1491 case Intrinsic::amdgcn_permlane64:
1492 case Intrinsic::amdgcn_readfirstlane:
1493 case Intrinsic::amdgcn_readlane:
1494 case Intrinsic::amdgcn_ds_bpermute: {
1496 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1497 const Use &Src =
II.getArgOperandUse(SrcIdx);
1501 if (IID == Intrinsic::amdgcn_readlane &&
1508 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1509 const Use &Lane =
II.getArgOperandUse(0);
1513 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1514 II.setCalledFunction(NewDecl);
1515 II.setOperand(0, Src);
1516 II.setOperand(1, NewLane);
1521 if (IID != Intrinsic::amdgcn_ds_bpermute) {
1526 return std::nullopt;
1528 case Intrinsic::amdgcn_writelane: {
1532 return std::nullopt;
1534 case Intrinsic::amdgcn_trig_preop: {
1537 if (!
II.getType()->isDoubleTy())
1540 Value *Src =
II.getArgOperand(0);
1541 Value *Segment =
II.getArgOperand(1);
1550 if (StrippedSign != Src)
1553 if (
II.isStrictFP())
1575 unsigned Shift = SegmentVal * 53;
1580 static const uint32_t TwoByPi[] = {
1581 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1582 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1583 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1584 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1585 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1586 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1590 unsigned Idx = Shift >> 5;
1591 if (Idx + 2 >= std::size(TwoByPi)) {
1596 unsigned BShift = Shift & 0x1f;
1600 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1604 int Scale = -53 - Shift;
1611 case Intrinsic::amdgcn_fmul_legacy: {
1612 Value *Op0 =
II.getArgOperand(0);
1613 Value *Op1 =
II.getArgOperand(1);
1615 for (
Value *Src : {Op0, Op1}) {
1636 case Intrinsic::amdgcn_fma_legacy: {
1637 Value *Op0 =
II.getArgOperand(0);
1638 Value *Op1 =
II.getArgOperand(1);
1639 Value *Op2 =
II.getArgOperand(2);
1641 for (
Value *Src : {Op0, Op1, Op2}) {
1663 II.getModule(), Intrinsic::fma,
II.getType()));
1668 case Intrinsic::amdgcn_is_shared:
1669 case Intrinsic::amdgcn_is_private: {
1670 Value *Src =
II.getArgOperand(0);
1680 case Intrinsic::amdgcn_make_buffer_rsrc: {
1681 Value *Src =
II.getArgOperand(0);
1684 return std::nullopt;
1686 case Intrinsic::amdgcn_raw_buffer_store_format:
1687 case Intrinsic::amdgcn_struct_buffer_store_format:
1688 case Intrinsic::amdgcn_raw_tbuffer_store:
1689 case Intrinsic::amdgcn_struct_tbuffer_store:
1690 case Intrinsic::amdgcn_image_store_1d:
1691 case Intrinsic::amdgcn_image_store_1darray:
1692 case Intrinsic::amdgcn_image_store_2d:
1693 case Intrinsic::amdgcn_image_store_2darray:
1694 case Intrinsic::amdgcn_image_store_2darraymsaa:
1695 case Intrinsic::amdgcn_image_store_2dmsaa:
1696 case Intrinsic::amdgcn_image_store_3d:
1697 case Intrinsic::amdgcn_image_store_cube:
1698 case Intrinsic::amdgcn_image_store_mip_1d:
1699 case Intrinsic::amdgcn_image_store_mip_1darray:
1700 case Intrinsic::amdgcn_image_store_mip_2d:
1701 case Intrinsic::amdgcn_image_store_mip_2darray:
1702 case Intrinsic::amdgcn_image_store_mip_3d:
1703 case Intrinsic::amdgcn_image_store_mip_cube: {
1708 if (ST->hasDefaultComponentBroadcast())
1710 else if (ST->hasDefaultComponentZero())
1715 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
1723 case Intrinsic::amdgcn_prng_b32: {
1724 auto *Src =
II.getArgOperand(0);
1728 return std::nullopt;
1730 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1731 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1732 Value *Src0 =
II.getArgOperand(0);
1733 Value *Src1 =
II.getArgOperand(1);
1739 auto getFormatNumRegs = [](
unsigned FormatVal) {
1740 switch (FormatVal) {
1754 bool MadeChange =
false;
1755 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
1756 unsigned Src1NumElts = getFormatNumRegs(BLGP);
1760 if (Src0Ty->getNumElements() > Src0NumElts) {
1767 if (Src1Ty->getNumElements() > Src1NumElts) {
1775 return std::nullopt;
1786 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
1787 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
1788 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
1789 Value *Src0 =
II.getArgOperand(1);
1790 Value *Src1 =
II.getArgOperand(3);
1796 bool MadeChange =
false;
1802 if (Src0Ty->getNumElements() > Src0NumElts) {
1809 if (Src1Ty->getNumElements() > Src1NumElts) {
1817 return std::nullopt;
1829 case Intrinsic::amdgcn_wave_shuffle: {
1831 return std::nullopt;
1840 return std::nullopt;
1853 int DMaskIdx,
bool IsLoad) {
1856 :
II.getOperand(0)->getType());
1857 unsigned VWidth = IIVTy->getNumElements();
1860 Type *EltTy = IIVTy->getElementType();
1872 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
1877 DemandedElts = (1 << ActiveBits) - 1;
1879 if (UnusedComponentsAtFront > 0) {
1880 static const unsigned InvalidOffsetIdx = 0xf;
1883 switch (
II.getIntrinsicID()) {
1884 case Intrinsic::amdgcn_raw_buffer_load:
1885 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1888 case Intrinsic::amdgcn_s_buffer_load:
1892 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1893 OffsetIdx = InvalidOffsetIdx;
1897 case Intrinsic::amdgcn_struct_buffer_load:
1898 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1903 OffsetIdx = InvalidOffsetIdx;
1907 if (OffsetIdx != InvalidOffsetIdx) {
1909 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1910 auto *
Offset = Args[OffsetIdx];
1911 unsigned SingleComponentSizeInBits =
1913 unsigned OffsetAdd =
1914 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1915 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
1932 unsigned NewDMaskVal = 0;
1933 unsigned OrigLdStIdx = 0;
1934 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1935 const unsigned Bit = 1 << SrcIdx;
1936 if (!!(DMaskVal & Bit)) {
1937 if (!!DemandedElts[OrigLdStIdx])
1943 if (DMaskVal != NewDMaskVal)
1944 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
1947 unsigned NewNumElts = DemandedElts.
popcount();
1951 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
1953 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1965 OverloadTys[0] = NewTy;
1969 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1970 if (DemandedElts[OrigStoreIdx])
1973 if (NewNumElts == 1)
1985 if (NewNumElts == 1) {
1991 unsigned NewLoadIdx = 0;
1992 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1993 if (!!DemandedElts[OrigLoadIdx])
2009 APInt &UndefElts)
const {
2014 const unsigned FirstElt = DemandedElts.
countr_zero();
2016 const unsigned MaskLen = LastElt - FirstElt + 1;
2018 unsigned OldNumElts = VT->getNumElements();
2019 if (MaskLen == OldNumElts && MaskLen != 1)
2022 Type *EltTy = VT->getElementType();
2030 Value *Src =
II.getArgOperand(0);
2035 II.getOperandBundlesAsDefs(OpBundles);
2052 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2053 if (DemandedElts[FirstElt +
I])
2054 ExtractMask[
I] = FirstElt +
I;
2063 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2064 if (DemandedElts[FirstElt +
I])
2065 InsertMask[FirstElt +
I] =
I;
2077 SimplifyAndSetOp)
const {
2078 switch (
II.getIntrinsicID()) {
2079 case Intrinsic::amdgcn_readfirstlane:
2080 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2082 case Intrinsic::amdgcn_raw_buffer_load:
2083 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2084 case Intrinsic::amdgcn_raw_buffer_load_format:
2085 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
2086 case Intrinsic::amdgcn_raw_tbuffer_load:
2087 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
2088 case Intrinsic::amdgcn_s_buffer_load:
2089 case Intrinsic::amdgcn_struct_buffer_load:
2090 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2091 case Intrinsic::amdgcn_struct_buffer_load_format:
2092 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
2093 case Intrinsic::amdgcn_struct_tbuffer_load:
2094 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2097 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2103 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast(Value *V)
static std::optional< Instruction * > tryWaveShuffleDPP(const GCNSubtarget &ST, InstCombiner &IC, IntrinsicInst &II)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static bool isThreadID(const GCNSubtarget &ST, Value *V)
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI Constant * getInfinity(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
static Value * stripSignOnlyFPOps(Value *Val)
Ignore all operations which only change the sign of a value, returning the underlying magnitude value...
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.