21#include "llvm/IR/IntrinsicsAMDGPU.h"
28#define DEBUG_TYPE "AMDGPUtti"
32struct AMDGPUImageDMaskIntrinsic {
36#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
37#include "InstCombineTables.inc"
50 assert(Cmp0 != APFloat::cmpUnordered &&
"nans handled separately");
51 if (Cmp0 == APFloat::cmpEqual)
55 assert(Cmp1 != APFloat::cmpUnordered &&
"nans handled separately");
56 if (Cmp1 == APFloat::cmpEqual)
67 Type *VTy = V.getType();
73 if (
ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
76 APFloat FloatValue(ConstFloat->getValueAPF());
77 bool LosesInfo =
true;
78 FloatValue.
convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
83 if (
ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
86 APInt IntValue(ConstInt->getValue());
105 Type *VTy = V.getType();
106 if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
107 return cast<Instruction>(&V)->getOperand(0);
138 if (isa<FPMathOperator>(NewCall))
145 bool RemoveOldIntr = &OldIntr != &InstToReplace;
154static std::optional<Instruction *>
159 if (
const auto *LZMappingInfo =
161 if (
auto *ConstantLod =
162 dyn_cast<ConstantFP>(
II.getOperand(ImageDimIntr->
LodIndex))) {
163 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
168 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
169 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
176 if (
const auto *MIPMappingInfo =
178 if (
auto *ConstantMip =
179 dyn_cast<ConstantInt>(
II.getOperand(ImageDimIntr->
MipIndex))) {
180 if (ConstantMip->isZero()) {
185 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
186 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
193 if (
const auto *BiasMappingInfo =
195 if (
auto *ConstantBias =
196 dyn_cast<ConstantFP>(
II.getOperand(ImageDimIntr->
BiasIndex))) {
197 if (ConstantBias->isZero()) {
202 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
203 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
204 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
211 if (
const auto *OffsetMappingInfo =
213 if (
auto *ConstantOffset =
214 dyn_cast<ConstantInt>(
II.getOperand(ImageDimIntr->
OffsetIndex))) {
215 if (ConstantOffset->isZero()) {
218 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
220 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
221 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
228 if (ST->hasD16Images()) {
238 if (
II.hasOneUse()) {
241 if (
User->getOpcode() == Instruction::FPTrunc &&
245 [&](
auto &Args,
auto &ArgTys) {
248 ArgTys[0] = User->getType();
256 if (!ST->hasA16() && !ST->hasG16())
263 bool FloatCoord =
false;
265 bool OnlyDerivatives =
false;
268 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
269 Value *Coord =
II.getOperand(OperandIndex);
272 if (OperandIndex < ImageDimIntr->CoordStart ||
277 OnlyDerivatives =
true;
286 if (!OnlyDerivatives && !ST->hasA16())
287 OnlyDerivatives =
true;
290 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
293 "Only image instructions with a sampler can have a bias");
295 OnlyDerivatives =
true;
298 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
306 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
307 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
308 if (!OnlyDerivatives) {
309 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
312 if (ImageDimIntr->NumBiasArgs != 0)
313 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
319 OperandIndex < EndIndex; OperandIndex++) {
321 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
326 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
327 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
379 auto *VTy = cast<FixedVectorType>(UseV->
getType());
380 unsigned VWidth = VTy->getNumElements();
383 for (
int i = VWidth - 1; i > 0; --i) {
388 if (
auto *ConstElt = dyn_cast<Constant>(Elt)) {
389 if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
404 auto *VTy = cast<FixedVectorType>(V->getType());
405 unsigned VWidth = VTy->getNumElements();
410 if (
auto *SVI = dyn_cast<ShuffleVectorInst>(V))
411 SVI->getShuffleMask(ShuffleMask);
413 for (
int I = VWidth - 1;
I > 0; --
I) {
414 if (ShuffleMask.
empty()) {
416 if (!Elt || (Elt != FirstComponent && !isa<UndefValue>(Elt)))
443std::optional<Instruction *>
447 case Intrinsic::amdgcn_rcp: {
448 Value *Src =
II.getArgOperand(0);
451 if (isa<UndefValue>(Src)) {
460 if (
const ConstantFP *
C = dyn_cast<ConstantFP>(Src)) {
461 const APFloat &ArgVal =
C->getValueAPF();
475 auto *SrcCI = dyn_cast<IntrinsicInst>(Src);
479 auto IID = SrcCI->getIntrinsicID();
484 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
494 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
497 II.setFastMathFlags(InnerFMF);
499 II.setCalledFunction(NewDecl);
505 case Intrinsic::amdgcn_sqrt:
506 case Intrinsic::amdgcn_rsq: {
507 Value *Src =
II.getArgOperand(0);
510 if (isa<UndefValue>(Src)) {
517 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
519 II.getModule(), Intrinsic::sqrt, {II.getType()});
520 II.setCalledFunction(NewDecl);
526 case Intrinsic::amdgcn_log:
527 case Intrinsic::amdgcn_exp2: {
528 const bool IsLog = IID == Intrinsic::amdgcn_log;
529 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
530 Value *Src =
II.getArgOperand(0);
533 if (isa<PoisonValue>(Src))
540 if (
C->isInfinity()) {
543 if (!
C->isNegative())
547 if (IsExp &&
C->isNegative())
555 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
560 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->
isFloatTy())) {
562 : ConstantFP::get(Ty, 1.0);
566 if (IsLog &&
C->isNegative())
574 case Intrinsic::amdgcn_frexp_mant:
575 case Intrinsic::amdgcn_frexp_exp: {
576 Value *Src =
II.getArgOperand(0);
577 if (
const ConstantFP *
C = dyn_cast<ConstantFP>(Src)) {
582 if (IID == Intrinsic::amdgcn_frexp_mant) {
584 II, ConstantFP::get(
II.getContext(), Significand));
594 if (isa<UndefValue>(Src)) {
600 case Intrinsic::amdgcn_class: {
601 Value *Src0 =
II.getArgOperand(0);
602 Value *Src1 =
II.getArgOperand(1);
603 const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
606 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
609 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
615 if (isa<PoisonValue>(Src0) || isa<PoisonValue>(Src1))
630 case Intrinsic::amdgcn_cvt_pkrtz: {
631 Value *Src0 =
II.getArgOperand(0);
632 Value *Src1 =
II.getArgOperand(1);
633 if (
const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
634 if (
const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
636 II.getType()->getScalarType()->getFltSemantics();
638 APFloat Val0 = C0->getValueAPF();
639 APFloat Val1 = C1->getValueAPF();
645 ConstantFP::get(
II.getContext(), Val1)});
650 if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
656 case Intrinsic::amdgcn_cvt_pknorm_i16:
657 case Intrinsic::amdgcn_cvt_pknorm_u16:
658 case Intrinsic::amdgcn_cvt_pk_i16:
659 case Intrinsic::amdgcn_cvt_pk_u16: {
660 Value *Src0 =
II.getArgOperand(0);
661 Value *Src1 =
II.getArgOperand(1);
663 if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
669 case Intrinsic::amdgcn_ubfe:
670 case Intrinsic::amdgcn_sbfe: {
672 Value *Src =
II.getArgOperand(0);
673 if (isa<UndefValue>(Src)) {
681 ConstantInt *CWidth = dyn_cast<ConstantInt>(
II.getArgOperand(2));
684 if ((Width & (IntSize - 1)) == 0) {
689 if (Width >= IntSize) {
691 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
696 ConstantInt *COffset = dyn_cast<ConstantInt>(
II.getArgOperand(1));
702 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
706 bool Signed = IID == Intrinsic::amdgcn_sbfe;
708 if (!CWidth || !COffset)
718 if (
Offset + Width < IntSize) {
722 RightShift->takeName(&
II);
729 RightShift->takeName(&
II);
732 case Intrinsic::amdgcn_exp:
733 case Intrinsic::amdgcn_exp_row:
734 case Intrinsic::amdgcn_exp_compr: {
740 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
741 bool Changed =
false;
742 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
743 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
744 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
745 Value *Src =
II.getArgOperand(
I + 2);
746 if (!isa<UndefValue>(Src)) {
759 case Intrinsic::amdgcn_fmed3: {
763 Value *Src0 =
II.getArgOperand(0);
764 Value *Src1 =
II.getArgOperand(1);
765 Value *Src2 =
II.getArgOperand(2);
780 if (
auto *CI = dyn_cast<CallInst>(V)) {
781 CI->copyFastMathFlags(&
II);
791 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
796 if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
801 if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
807 II.setArgOperand(0, Src0);
808 II.setArgOperand(1, Src1);
809 II.setArgOperand(2, Src2);
813 if (
const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
814 if (
const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
815 if (
const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
834 {
X,
Y, Z}, &
II,
II.getName());
840 case Intrinsic::amdgcn_icmp:
841 case Intrinsic::amdgcn_fcmp: {
844 int64_t CCVal =
CC->getZExtValue();
845 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
852 Value *Src0 =
II.getArgOperand(0);
853 Value *Src1 =
II.getArgOperand(1);
855 if (
auto *CSrc0 = dyn_cast<Constant>(Src0)) {
856 if (
auto *CSrc1 = dyn_cast<Constant>(Src1)) {
870 II.getModule(), Intrinsic::read_register,
II.getType());
875 NewCall->
addFnAttr(Attribute::Convergent);
883 II.setArgOperand(0, Src1);
884 II.setArgOperand(1, Src0);
886 2, ConstantInt::get(
CC->getType(),
static_cast<int>(SwapPred)));
933 ? Intrinsic::amdgcn_fcmp
934 : Intrinsic::amdgcn_icmp;
937 if (
auto *CmpType = dyn_cast<IntegerType>(Ty)) {
939 unsigned Width = CmpType->getBitWidth();
940 unsigned NewWidth = Width;
948 else if (Width <= 32)
950 else if (Width <= 64)
955 if (Width != NewWidth) {
969 II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
970 Value *Args[] = {SrcLHS, SrcRHS,
971 ConstantInt::get(
CC->getType(), SrcPred)};
979 case Intrinsic::amdgcn_mbcnt_hi: {
985 case Intrinsic::amdgcn_ballot: {
986 if (
auto *Src = dyn_cast<ConstantInt>(
II.getArgOperand(0))) {
992 if (ST->
isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
999 {IC.Builder.getInt32Ty()},
1000 {II.getArgOperand(0)}),
1002 Call->takeName(&
II);
1007 case Intrinsic::amdgcn_wqm_vote: {
1009 if (!isa<Constant>(
II.getArgOperand(0)))
1014 case Intrinsic::amdgcn_kill: {
1015 const ConstantInt *
C = dyn_cast<ConstantInt>(
II.getArgOperand(0));
1016 if (!
C || !
C->getZExtValue())
1022 case Intrinsic::amdgcn_update_dpp: {
1023 Value *Old =
II.getArgOperand(0);
1025 auto *BC = cast<ConstantInt>(
II.getArgOperand(5));
1026 auto *RM = cast<ConstantInt>(
II.getArgOperand(3));
1027 auto *BM = cast<ConstantInt>(
II.getArgOperand(4));
1028 if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
1029 BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
1035 case Intrinsic::amdgcn_permlane16:
1036 case Intrinsic::amdgcn_permlane16_var:
1037 case Intrinsic::amdgcn_permlanex16:
1038 case Intrinsic::amdgcn_permlanex16_var: {
1040 Value *VDstIn =
II.getArgOperand(0);
1041 if (isa<UndefValue>(VDstIn))
1045 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1046 IID == Intrinsic::amdgcn_permlanex16)
1053 unsigned int BcIdx = FiIdx + 1;
1055 ConstantInt *FetchInvalid = cast<ConstantInt>(
II.getArgOperand(FiIdx));
1056 ConstantInt *BoundCtrl = cast<ConstantInt>(
II.getArgOperand(BcIdx));
1062 case Intrinsic::amdgcn_permlane64:
1064 if (
Constant *
C = dyn_cast<Constant>(
II.getArgOperand(0))) {
1068 case Intrinsic::amdgcn_readfirstlane:
1069 case Intrinsic::amdgcn_readlane: {
1071 if (
Constant *
C = dyn_cast<Constant>(
II.getArgOperand(0))) {
1077 Value *Src =
II.getArgOperand(0);
1078 Instruction *SrcInst = dyn_cast<Instruction>(Src);
1079 if (SrcInst && SrcInst->
getParent() !=
II.getParent())
1085 PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
1089 if (IID == Intrinsic::amdgcn_readfirstlane) {
1091 if (
match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
1096 if (
match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
1105 case Intrinsic::amdgcn_trig_preop: {
1108 if (!
II.getType()->isDoubleTy())
1111 Value *Src =
II.getArgOperand(0);
1112 Value *Segment =
II.getArgOperand(1);
1113 if (isa<PoisonValue>(Src) || isa<PoisonValue>(Segment))
1116 if (isa<UndefValue>(Src)) {
1117 auto *QNaN = ConstantFP::get(
1122 const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src);
1126 if (
II.isStrictFP())
1131 auto *Quieted = ConstantFP::get(
II.getType(), Fsrc.
makeQuiet());
1135 const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment);
1141 unsigned Shift = SegmentVal * 53;
1146 static const uint32_t TwoByPi[] = {
1147 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
1148 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
1149 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
1150 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
1151 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
1152 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
1156 unsigned Idx = Shift >> 5;
1157 if (
Idx + 2 >= std::size(TwoByPi)) {
1162 unsigned BShift = Shift & 0x1f;
1166 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
1170 int Scale = -53 - Shift;
1177 case Intrinsic::amdgcn_fmul_legacy: {
1178 Value *Op0 =
II.getArgOperand(0);
1179 Value *Op1 =
II.getArgOperand(1);
1197 case Intrinsic::amdgcn_fma_legacy: {
1198 Value *Op0 =
II.getArgOperand(0);
1199 Value *Op1 =
II.getArgOperand(1);
1200 Value *Op2 =
II.getArgOperand(2);
1219 II.getModule(), Intrinsic::fma,
II.getType()));
1224 case Intrinsic::amdgcn_is_shared:
1225 case Intrinsic::amdgcn_is_private: {
1226 if (isa<UndefValue>(
II.getArgOperand(0)))
1229 if (isa<ConstantPointerNull>(
II.getArgOperand(0)))
1233 case Intrinsic::amdgcn_raw_buffer_store_format:
1234 case Intrinsic::amdgcn_struct_buffer_store_format:
1235 case Intrinsic::amdgcn_raw_tbuffer_store:
1236 case Intrinsic::amdgcn_struct_tbuffer_store:
1237 case Intrinsic::amdgcn_image_store_1d:
1238 case Intrinsic::amdgcn_image_store_1darray:
1239 case Intrinsic::amdgcn_image_store_2d:
1240 case Intrinsic::amdgcn_image_store_2darray:
1241 case Intrinsic::amdgcn_image_store_2darraymsaa:
1242 case Intrinsic::amdgcn_image_store_2dmsaa:
1243 case Intrinsic::amdgcn_image_store_3d:
1244 case Intrinsic::amdgcn_image_store_cube:
1245 case Intrinsic::amdgcn_image_store_mip_1d:
1246 case Intrinsic::amdgcn_image_store_mip_1darray:
1247 case Intrinsic::amdgcn_image_store_mip_2d:
1248 case Intrinsic::amdgcn_image_store_mip_2darray:
1249 case Intrinsic::amdgcn_image_store_mip_3d:
1250 case Intrinsic::amdgcn_image_store_mip_cube: {
1251 if (!isa<FixedVectorType>(
II.getArgOperand(0)->getType()))
1262 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
1275 return std::nullopt;
1288 int DMaskIdx,
bool IsLoad) {
1290 auto *IIVTy = cast<FixedVectorType>(IsLoad ?
II.getType()
1291 :
II.getOperand(0)->getType());
1292 unsigned VWidth = IIVTy->getNumElements();
1295 Type *EltTy = IIVTy->getElementType();
1307 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
1312 DemandedElts = (1 << ActiveBits) - 1;
1314 if (UnusedComponentsAtFront > 0) {
1315 static const unsigned InvalidOffsetIdx = 0xf;
1318 switch (
II.getIntrinsicID()) {
1319 case Intrinsic::amdgcn_raw_buffer_load:
1320 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1323 case Intrinsic::amdgcn_s_buffer_load:
1327 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
1328 OffsetIdx = InvalidOffsetIdx;
1332 case Intrinsic::amdgcn_struct_buffer_load:
1333 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1338 OffsetIdx = InvalidOffsetIdx;
1342 if (OffsetIdx != InvalidOffsetIdx) {
1344 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
1345 auto *
Offset = Args[OffsetIdx];
1346 unsigned SingleComponentSizeInBits =
1348 unsigned OffsetAdd =
1349 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
1350 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
1357 ConstantInt *DMask = cast<ConstantInt>(Args[DMaskIdx]);
1367 unsigned NewDMaskVal = 0;
1368 unsigned OrigLdStIdx = 0;
1369 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
1370 const unsigned Bit = 1 << SrcIdx;
1371 if (!!(DMaskVal & Bit)) {
1372 if (!!DemandedElts[OrigLdStIdx])
1378 if (DMaskVal != NewDMaskVal)
1379 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
1382 unsigned NewNumElts = DemandedElts.
popcount();
1386 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
1388 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
1400 OverloadTys[0] = NewTy;
1404 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
1405 if (DemandedElts[OrigStoreIdx])
1408 if (NewNumElts == 1)
1415 II.getModule(),
II.getIntrinsicID(), OverloadTys);
1421 if (NewNumElts == 1) {
1427 unsigned NewLoadIdx = 0;
1428 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
1429 if (!!DemandedElts[OrigLoadIdx])
1447 SimplifyAndSetOp)
const {
1448 switch (
II.getIntrinsicID()) {
1449 case Intrinsic::amdgcn_raw_buffer_load:
1450 case Intrinsic::amdgcn_raw_ptr_buffer_load:
1451 case Intrinsic::amdgcn_raw_buffer_load_format:
1452 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
1453 case Intrinsic::amdgcn_raw_tbuffer_load:
1454 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
1455 case Intrinsic::amdgcn_s_buffer_load:
1456 case Intrinsic::amdgcn_struct_buffer_load:
1457 case Intrinsic::amdgcn_struct_ptr_buffer_load:
1458 case Intrinsic::amdgcn_struct_buffer_load_format:
1459 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
1460 case Intrinsic::amdgcn_struct_tbuffer_load:
1461 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
1464 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
1470 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static APInt defaultComponentBroadcast(Value *V)
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc)
Match an fpext from half to float, or a constant we can convert.
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static Constant * getInfinity(Type *Ty, bool Negative=false)
static Constant * getZero(Type *Ty, bool Negative=false)
static Constant * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
This is the shared class of boolean and integer constants.
static ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool hasDefaultComponentZero() const
bool hasDefaultComponentBroadcast() const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Value * CreateFAddFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateFPCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateFMulFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateMinNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
LLVMContext & getContext() const
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static MDString * get(LLVMContext &Context, StringRef Str)
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
static Type * getHalfTy(LLVMContext &C)
unsigned getIntegerBitWidth() const
const fltSemantics & getFltSemantics() const
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
OneUse_match< T > m_OneUse(const T &SubPattern)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
class_match< ConstantFP > m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
This is an optimization pass for GlobalISel generic memory operations.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
bool isKnownNeverInfOrNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point value can never contain a NaN or infinity.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2019 maximumNumber semantics.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
constexpr int PoisonMaskElem
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmNearestTiesToEven
static constexpr roundingMode rmTowardZero
static const fltSemantics & IEEEhalf() LLVM_READNONE
SimplifyQuery getWithInstruction(const Instruction *I) const
bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.