21#include "llvm/IR/IntrinsicsAMDGPU.h"
28#define DEBUG_TYPE "AMDGPUtti"
32struct AMDGPUImageDMaskIntrinsic {
36#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
37#include "InstCombineTables.inc"
50 assert(Cmp0 != APFloat::cmpUnordered &&
"nans handled separately");
51 if (Cmp0 == APFloat::cmpEqual)
55 assert(Cmp1 != APFloat::cmpUnordered &&
"nans handled separately");
56 if (Cmp1 == APFloat::cmpEqual)
67 Type *VTy = V.getType();
73 if (
ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
76 APFloat FloatValue(ConstFloat->getValueAPF());
77 bool LosesInfo =
true;
78 FloatValue.
convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
83 if (
ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
86 APInt IntValue(ConstInt->getValue());
105 Type *VTy = V.getType();
106 if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
107 return cast<Instruction>(&V)->getOperand(0);
136 if (isa<FPMathOperator>(NewCall))
143 bool RemoveOldIntr = &OldIntr != &InstToReplace;
152static std::optional<Instruction *>
157 if (
const auto *LZMappingInfo =
159 if (
auto *ConstantLod =
160 dyn_cast<ConstantFP>(
II.getOperand(ImageDimIntr->
LodIndex))) {
161 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
166 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
167 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
174 if (
const auto *MIPMappingInfo =
176 if (
auto *ConstantMip =
177 dyn_cast<ConstantInt>(
II.getOperand(ImageDimIntr->
MipIndex))) {
178 if (ConstantMip->isZero()) {
183 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
184 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
191 if (
const auto *BiasMappingInfo =
193 if (
auto *ConstantBias =
194 dyn_cast<ConstantFP>(
II.getOperand(ImageDimIntr->
BiasIndex))) {
195 if (ConstantBias->isZero()) {
200 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
201 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
202 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
209 if (
const auto *OffsetMappingInfo =
211 if (
auto *ConstantOffset =
212 dyn_cast<ConstantInt>(
II.getOperand(ImageDimIntr->
OffsetIndex))) {
213 if (ConstantOffset->isZero()) {
216 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
218 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
219 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
226 if (ST->hasD16Images()) {
236 if (
II.hasOneUse()) {
239 if (
User->getOpcode() == Instruction::FPTrunc &&
243 [&](
auto &Args,
auto &ArgTys) {
246 ArgTys[0] = User->getType();
254 if (!ST->hasA16() && !ST->hasG16())
261 bool FloatCoord =
false;
263 bool OnlyDerivatives =
false;
266 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
267 Value *Coord =
II.getOperand(OperandIndex);
270 if (OperandIndex < ImageDimIntr->CoordStart ||
275 OnlyDerivatives =
true;
284 if (!OnlyDerivatives && !ST->hasA16())
285 OnlyDerivatives =
true;
288 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
291 "Only image instructions with a sampler can have a bias");
293 OnlyDerivatives =
true;
296 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
304 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
305 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
306 if (!OnlyDerivatives) {
307 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
310 if (ImageDimIntr->NumBiasArgs != 0)
311 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
317 OperandIndex < EndIndex; OperandIndex++) {
319 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
324 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
325 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
355 Value *Src =
nullptr;
358 if (Src->getType()->isHalfTy())
374 auto *VTy = cast<FixedVectorType>(UseV->
getType());
375 unsigned VWidth = VTy->getNumElements();
378 for (
int i = VWidth - 1; i > 0; --i) {
383 if (
auto *ConstElt = dyn_cast<Constant>(Elt)) {
384 if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
399 auto *VTy = cast<FixedVectorType>(V->getType());
400 unsigned VWidth = VTy->getNumElements();
405 if (
auto *SVI = dyn_cast<ShuffleVectorInst>(V))
406 SVI->getShuffleMask(ShuffleMask);
408 for (
int I = VWidth - 1;
I > 0; --
I) {
409 if (ShuffleMask.
empty()) {
411 if (!Elt || (Elt != FirstComponent && !isa<UndefValue>(Elt)))
441 if (isa<Constant>(V))
443 if (
const auto *
II = dyn_cast<IntrinsicInst>(V)) {
448 return II->getParent() == cast<Instruction>(U.getUser())->getParent();
458 unsigned LaneArgIdx)
const {
460 APInt DemandedMask(32, maskTrailingOnes<unsigned>(MaskBits));
473 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
476 if (MaskedConst != LaneArg) {
477 II.getOperandUse(LaneArgIdx).set(MaskedConst);
484std::optional<Instruction *>
488 case Intrinsic::amdgcn_rcp: {
489 Value *Src =
II.getArgOperand(0);
492 if (isa<UndefValue>(Src)) {
501 if (
const ConstantFP *
C = dyn_cast<ConstantFP>(Src)) {
502 const APFloat &ArgVal =
C->getValueAPF();
516 auto *SrcCI = dyn_cast<IntrinsicInst>(Src);
520 auto IID = SrcCI->getIntrinsicID();
525 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
535 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
538 II.setFastMathFlags(InnerFMF);
540 II.setCalledFunction(NewDecl);
546 case Intrinsic::amdgcn_sqrt:
547 case Intrinsic::amdgcn_rsq: {
548 Value *Src =
II.getArgOperand(0);
551 if (isa<UndefValue>(Src)) {
558 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
560 II.getModule(), Intrinsic::sqrt, {II.getType()});
561 II.setCalledFunction(NewDecl);
567 case Intrinsic::amdgcn_log:
568 case Intrinsic::amdgcn_exp2: {
569 const bool IsLog = IID == Intrinsic::amdgcn_log;
570 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
571 Value *Src =
II.getArgOperand(0);
574 if (isa<PoisonValue>(Src))
581 if (
C->isInfinity()) {
584 if (!
C->isNegative())
588 if (IsExp &&
C->isNegative())
596 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
601 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->
isFloatTy())) {
603 : ConstantFP::get(Ty, 1.0);
607 if (IsLog &&
C->isNegative())
615 case Intrinsic::amdgcn_frexp_mant:
616 case Intrinsic::amdgcn_frexp_exp: {
617 Value *Src =
II.getArgOperand(0);
618 if (
const ConstantFP *
C = dyn_cast<ConstantFP>(Src)) {
623 if (IID == Intrinsic::amdgcn_frexp_mant) {
625 II, ConstantFP::get(
II.getContext(), Significand));
635 if (isa<UndefValue>(Src)) {
641 case Intrinsic::amdgcn_class: {
642 Value *Src0 =
II.getArgOperand(0);
643 Value *Src1 =
II.getArgOperand(1);
644 const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
647 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
650 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
656 if (isa<PoisonValue>(Src0) || isa<PoisonValue>(Src1))
671 case Intrinsic::amdgcn_cvt_pkrtz: {
672 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
675 if (isa<PoisonValue>(Arg))
677 if (isa<UndefValue>(Arg))
685 return ConstantFP::get(HalfTy, Val);
688 Value *Src =
nullptr;
690 if (Src->getType()->isHalfTy())
697 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
698 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
708 case Intrinsic::amdgcn_cvt_pknorm_i16:
709 case Intrinsic::amdgcn_cvt_pknorm_u16:
710 case Intrinsic::amdgcn_cvt_pk_i16:
711 case Intrinsic::amdgcn_cvt_pk_u16: {
712 Value *Src0 =
II.getArgOperand(0);
713 Value *Src1 =
II.getArgOperand(1);
715 if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
721 case Intrinsic::amdgcn_ubfe:
722 case Intrinsic::amdgcn_sbfe: {
724 Value *Src =
II.getArgOperand(0);
725 if (isa<UndefValue>(Src)) {
733 ConstantInt *CWidth = dyn_cast<ConstantInt>(
II.getArgOperand(2));
736 if ((Width & (IntSize - 1)) == 0) {
741 if (Width >= IntSize) {
743 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
748 ConstantInt *COffset = dyn_cast<ConstantInt>(
II.getArgOperand(1));
754 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
758 bool Signed = IID == Intrinsic::amdgcn_sbfe;
760 if (!CWidth || !COffset)
770 if (
Offset + Width < IntSize) {
774 RightShift->takeName(&
II);
781 RightShift->takeName(&
II);
784 case Intrinsic::amdgcn_exp:
785 case Intrinsic::amdgcn_exp_row:
786 case Intrinsic::amdgcn_exp_compr: {
792 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
793 bool Changed =
false;
794 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
795 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
796 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
797 Value *Src =
II.getArgOperand(
I + 2);
798 if (!isa<UndefValue>(Src)) {
811 case Intrinsic::amdgcn_fmed3: {
815 Value *Src0 =
II.getArgOperand(0);
816 Value *Src1 =
II.getArgOperand(1);
817 Value *Src2 =
II.getArgOperand(2);
832 if (
auto *CI = dyn_cast<CallInst>(V)) {
833 CI->copyFastMathFlags(&
II);
843 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
848 if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
853 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
859 II.setArgOperand(0, Src0);
860 II.setArgOperand(1, Src1);
861 II.setArgOperand(2, Src2);
865 if (
const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
866 if (
const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
867 if (
const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
885 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
893 case Intrinsic::amdgcn_icmp:
894 case Intrinsic::amdgcn_fcmp: {
897 int64_t CCVal =
CC->getZExtValue();
898 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
905 Value *Src0 =
II.getArgOperand(0);
906 Value *Src1 =
II.getArgOperand(1);
908 if (
auto *CSrc0 = dyn_cast<Constant>(Src0)) {
909 if (
auto *CSrc1 = dyn_cast<Constant>(Src1)) {
927 NewCall->
addFnAttr(Attribute::Convergent);
935 II.setArgOperand(0, Src1);
936 II.setArgOperand(1, Src0);
938 2, ConstantInt::get(
CC->getType(),
static_cast<int>(SwapPred)));
985 ? Intrinsic::amdgcn_fcmp
986 : Intrinsic::amdgcn_icmp;
989 if (
auto *CmpType = dyn_cast<IntegerType>(Ty)) {
991 unsigned Width = CmpType->getBitWidth();
992 unsigned NewWidth = Width;
1000 else if (Width <= 32)
1002 else if (Width <= 64)
1007 if (Width != NewWidth) {
1020 Value *Args[] = {SrcLHS, SrcRHS,
1021 ConstantInt::get(
CC->getType(), SrcPred)};
1023 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1030 case Intrinsic::amdgcn_mbcnt_hi: {
1036 case Intrinsic::amdgcn_ballot: {
1037 if (
auto *Src = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
1038 if (Src->isZero()) {
1043 if (ST->
isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1050 {IC.Builder.getInt32Ty()},
1051 {II.getArgOperand(0)}),
1053 Call->takeName(&
II);
1058 case Intrinsic::amdgcn_wavefrontsize: {
1064 case Intrinsic::amdgcn_wqm_vote: {
1066 if (!isa<Constant>(
II.getArgOperand(0)))
1071 case Intrinsic::amdgcn_kill: {
1072 const ConstantInt *
C = dyn_cast<ConstantInt>(
II.getArgOperand(0));
1073 if (!
C || !
C->getZExtValue())
1079 case Intrinsic::amdgcn_update_dpp: {
1080 Value *Old =
II.getArgOperand(0);
1082 auto *BC = cast<ConstantInt>(
II.getArgOperand(5));
1083 auto *RM = cast<ConstantInt>(
II.getArgOperand(3));
1084 auto *BM = cast<ConstantInt>(
II.getArgOperand(4));
1085 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
1086 BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
1092 case Intrinsic::amdgcn_permlane16:
1093 case Intrinsic::amdgcn_permlane16_var:
1094 case Intrinsic::amdgcn_permlanex16:
1095 case Intrinsic::amdgcn_permlanex16_var: {
1097 Value *VDstIn =
II.getArgOperand(0);
1098 if (isa<UndefValue>(VDstIn))
1102 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1103 IID == Intrinsic::amdgcn_permlanex16)
1110 unsigned int BcIdx = FiIdx + 1;
1112 ConstantInt *FetchInvalid = cast<ConstantInt>(
II.getArgOperand(FiIdx));
1113 ConstantInt *BoundCtrl = cast<ConstantInt>(
II.getArgOperand(BcIdx));
1119 case Intrinsic::amdgcn_permlane64:
1120 case Intrinsic::amdgcn_readfirstlane:
1121 case Intrinsic::amdgcn_readlane: {
1123 const Use &Src =
II.getArgOperandUse(0);
1127 if (IID == Intrinsic::amdgcn_readlane &&
1131 return std::nullopt;
1133 case Intrinsic::amdgcn_writelane: {
1136 return std::nullopt;
1138 case Intrinsic::amdgcn_trig_preop: {
1141 if (!
II.getType()->isDoubleTy())
1144 Value *Src =
II.getArgOperand(0);
1145 Value *Segment =
II.getArgOperand(1);
1146 if (isa<PoisonValue>(Src) || isa<PoisonValue>(Segment))
1149 if (isa<UndefValue>(Src)) {
1150 auto *QNaN = ConstantFP::get(
1155 const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src);
1159 if (
II.isStrictFP())
1164 auto *Quieted = ConstantFP::get(
II.getType(), Fsrc.
makeQuiet());
1168 const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment);
1174 unsigned Shift = SegmentVal * 53;
1179 static const uint32_t TwoByPi[] = {
1180 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1181 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1182 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1183 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1184 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1185 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1189 unsigned Idx = Shift >> 5;
1190 if (
Idx + 2 >= std::size(TwoByPi)) {
1195 unsigned BShift = Shift & 0x1f;
1199 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1203 int Scale = -53 - Shift;
1210 case Intrinsic::amdgcn_fmul_legacy: {
1211 Value *Op0 =
II.getArgOperand(0);
1212 Value *Op1 =
II.getArgOperand(1);
1230 case Intrinsic::amdgcn_fma_legacy: {
1231 Value *Op0 =
II.getArgOperand(0);
1232 Value *Op1 =
II.getArgOperand(1);
1233 Value *Op2 =
II.getArgOperand(2);
1252 II.getModule(), Intrinsic::fma,
II.getType()));
1257 case Intrinsic::amdgcn_is_shared:
1258 case Intrinsic::amdgcn_is_private: {
1259 if (isa<UndefValue>(
II.getArgOperand(0)))
1262 if (isa<ConstantPointerNull>(
II.getArgOperand(0)))
1266 case Intrinsic::amdgcn_raw_buffer_store_format:
1267 case Intrinsic::amdgcn_struct_buffer_store_format:
1268 case Intrinsic::amdgcn_raw_tbuffer_store:
1269 case Intrinsic::amdgcn_struct_tbuffer_store:
1270 case Intrinsic::amdgcn_image_store_1d:
1271 case Intrinsic::amdgcn_image_store_1darray:
1272 case Intrinsic::amdgcn_image_store_2d:
1273 case Intrinsic::amdgcn_image_store_2darray:
1274 case Intrinsic::amdgcn_image_store_2darraymsaa:
1275 case Intrinsic::amdgcn_image_store_2dmsaa:
1276 case Intrinsic::amdgcn_image_store_3d:
1277 case Intrinsic::amdgcn_image_store_cube:
1278 case Intrinsic::amdgcn_image_store_mip_1d:
1279 case Intrinsic::amdgcn_image_store_mip_1darray:
1280 case Intrinsic::amdgcn_image_store_mip_2d:
1281 case Intrinsic::amdgcn_image_store_mip_2darray:
1282 case Intrinsic::amdgcn_image_store_mip_3d:
1283 case Intrinsic::amdgcn_image_store_mip_cube: {
1284 if (!isa<FixedVectorType>(
II.getArgOperand(0)->getType()))
1295 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
1303 case Intrinsic::amdgcn_prng_b32: {
1304 auto *Src =
II.getArgOperand(0);
1305 if (isa<UndefValue>(Src)) {
1308 return std::nullopt;
1310 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
1311 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
1312 Value *Src0 =
II.getArgOperand(0);
1313 Value *Src1 =
II.getArgOperand(1);
1314 uint64_t CBSZ = cast<ConstantInt>(
II.getArgOperand(3))->getZExtValue();
1315 uint64_t BLGP = cast<ConstantInt>(
II.getArgOperand(4))->getZExtValue();
1316 auto *Src0Ty = cast<FixedVectorType>(Src0->
getType());
1317 auto *Src1Ty = cast<FixedVectorType>(Src1->
getType());
1319 auto getFormatNumRegs = [](
unsigned FormatVal) {
1320 switch (FormatVal) {
1334 bool MadeChange =
false;
1335 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
1336 unsigned Src1NumElts = getFormatNumRegs(BLGP);
1340 if (Src0Ty->getNumElements() > Src0NumElts) {
1347 if (Src1Ty->getNumElements() > Src1NumElts) {
1355 return std::nullopt;
1371 return std::nullopt;
1384 int DMaskIdx,
bool IsLoad) {
1386 auto *IIVTy = cast<FixedVectorType>(IsLoad ?
II.getType()
1387 :
II.getOperand(0)->getType());
1388 unsigned VWidth = IIVTy->getNumElements();
1391 Type *EltTy = IIVTy->getElementType();
1403 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
1408 DemandedElts = (1 << ActiveBits) - 1;
1410 if (UnusedComponentsAtFront > 0) {
1411 static const unsigned InvalidOffsetIdx = 0xf;
1414 switch (
II.getIntrinsicID()) {
1415 case Intrinsic::amdgcn_raw_buffer_load:
1416 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1419 case Intrinsic::amdgcn_s_buffer_load:
1423 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1424 OffsetIdx = InvalidOffsetIdx;
1428 case Intrinsic::amdgcn_struct_buffer_load:
1429 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1434 OffsetIdx = InvalidOffsetIdx;
1438 if (OffsetIdx != InvalidOffsetIdx) {
1440 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1441 auto *
Offset = Args[OffsetIdx];
1442 unsigned SingleComponentSizeInBits =
1444 unsigned OffsetAdd =
1445 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1446 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
1453 ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]);
1463 unsigned NewDMaskVal = 0;
1464 unsigned OrigLdStIdx = 0;
1465 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1466 const unsigned Bit = 1 << SrcIdx;
1467 if (!!(DMaskVal & Bit)) {
1468 if (!!DemandedElts[OrigLdStIdx])
1474 if (DMaskVal != NewDMaskVal)
1475 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
1478 unsigned NewNumElts = DemandedElts.
popcount();
1482 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
1484 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1496 OverloadTys[0] = NewTy;
1500 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1501 if (DemandedElts[OrigStoreIdx])
1504 if (NewNumElts == 1)
1516 if (NewNumElts == 1) {
1522 unsigned NewLoadIdx = 0;
1523 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1524 if (!!DemandedElts[OrigLoadIdx])
1542 SimplifyAndSetOp)
const {
1543 switch (
II.getIntrinsicID()) {
1544 case Intrinsic::amdgcn_raw_buffer_load:
1545 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1546 case Intrinsic::amdgcn_raw_buffer_load_format:
1547 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1548 case Intrinsic::amdgcn_raw_tbuffer_load:
1549 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1550 case Intrinsic::amdgcn_s_buffer_load:
1551 case Intrinsic::amdgcn_struct_buffer_load:
1552 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1553 case Intrinsic::amdgcn_struct_buffer_load_format:
1554 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1555 case Intrinsic::amdgcn_struct_tbuffer_load:
1556 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
1559 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
1565 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast(Value *V)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static Constant * getInfinity(Type *Ty, bool Negative=false)
static Constant * getZero(Type *Ty, bool Negative=false)
static Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool hasDefaultComponentZero() const
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
bool hasDefaultComponentBroadcast() const
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maxnum intrinsic.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateMinNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateFPCast(Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVMContext & getContext() const
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q)=0
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
unsigned getIntegerBitWidth() const
const fltSemantics & getFltSemantics() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
void takeName(Value *V)
Transfer the name from V to this value.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
OneUse_match< T > m_OneUse(const T &SubPattern)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool isKnownNeverInfOrNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point value can never contain a NaN or infinity.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2019 maximumNumber semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
constexpr int PoisonMaskElem
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEhalf() LLVM_READNONE
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.