18#include "llvm/IR/IntrinsicsX86.h"
25#define DEBUG_TYPE "x86tti"
30 VectorType *IntTy = VectorType::getInteger(cast<VectorType>(V->getType()));
41 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask))
63 if (isa<ConstantAggregateZero>(Mask))
71 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
93 if (isa<ConstantAggregateZero>(Mask)) {
106 unsigned AddrSpace = cast<PointerType>(
Ptr->getType())->getAddressSpace();
122 bool LogicalShift =
false;
123 bool ShiftLeft =
false;
129 case Intrinsic::x86_sse2_psrai_d:
130 case Intrinsic::x86_sse2_psrai_w:
131 case Intrinsic::x86_avx2_psrai_d:
132 case Intrinsic::x86_avx2_psrai_w:
133 case Intrinsic::x86_avx512_psrai_q_128:
134 case Intrinsic::x86_avx512_psrai_q_256:
135 case Intrinsic::x86_avx512_psrai_d_512:
136 case Intrinsic::x86_avx512_psrai_q_512:
137 case Intrinsic::x86_avx512_psrai_w_512:
140 case Intrinsic::x86_sse2_psra_d:
141 case Intrinsic::x86_sse2_psra_w:
142 case Intrinsic::x86_avx2_psra_d:
143 case Intrinsic::x86_avx2_psra_w:
144 case Intrinsic::x86_avx512_psra_q_128:
145 case Intrinsic::x86_avx512_psra_q_256:
146 case Intrinsic::x86_avx512_psra_d_512:
147 case Intrinsic::x86_avx512_psra_q_512:
148 case Intrinsic::x86_avx512_psra_w_512:
149 LogicalShift =
false;
152 case Intrinsic::x86_sse2_psrli_d:
153 case Intrinsic::x86_sse2_psrli_q:
154 case Intrinsic::x86_sse2_psrli_w:
155 case Intrinsic::x86_avx2_psrli_d:
156 case Intrinsic::x86_avx2_psrli_q:
157 case Intrinsic::x86_avx2_psrli_w:
158 case Intrinsic::x86_avx512_psrli_d_512:
159 case Intrinsic::x86_avx512_psrli_q_512:
160 case Intrinsic::x86_avx512_psrli_w_512:
163 case Intrinsic::x86_sse2_psrl_d:
164 case Intrinsic::x86_sse2_psrl_q:
165 case Intrinsic::x86_sse2_psrl_w:
166 case Intrinsic::x86_avx2_psrl_d:
167 case Intrinsic::x86_avx2_psrl_q:
168 case Intrinsic::x86_avx2_psrl_w:
169 case Intrinsic::x86_avx512_psrl_d_512:
170 case Intrinsic::x86_avx512_psrl_q_512:
171 case Intrinsic::x86_avx512_psrl_w_512:
175 case Intrinsic::x86_sse2_pslli_d:
176 case Intrinsic::x86_sse2_pslli_q:
177 case Intrinsic::x86_sse2_pslli_w:
178 case Intrinsic::x86_avx2_pslli_d:
179 case Intrinsic::x86_avx2_pslli_q:
180 case Intrinsic::x86_avx2_pslli_w:
181 case Intrinsic::x86_avx512_pslli_d_512:
182 case Intrinsic::x86_avx512_pslli_q_512:
183 case Intrinsic::x86_avx512_pslli_w_512:
186 case Intrinsic::x86_sse2_psll_d:
187 case Intrinsic::x86_sse2_psll_q:
188 case Intrinsic::x86_sse2_psll_w:
189 case Intrinsic::x86_avx2_psll_d:
190 case Intrinsic::x86_avx2_psll_q:
191 case Intrinsic::x86_avx2_psll_w:
192 case Intrinsic::x86_avx512_psll_d_512:
193 case Intrinsic::x86_avx512_psll_q_512:
194 case Intrinsic::x86_avx512_psll_w_512:
199 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
203 auto *VT = cast<FixedVectorType>(Vec->
getType());
204 Type *SVT = VT->getElementType();
206 unsigned VWidth = VT->getNumElements();
217 Amt =
Builder.CreateZExtOrTrunc(Amt, SVT);
218 Amt =
Builder.CreateVectorSplat(VWidth, Amt);
219 return (LogicalShift ? (ShiftLeft ?
Builder.CreateShl(Vec, Amt)
220 :
Builder.CreateLShr(Vec, Amt))
221 :
Builder.CreateAShr(Vec, Amt));
227 return Builder.CreateAShr(Vec,
Builder.CreateVectorSplat(VWidth, Amt));
233 cast<VectorType>(AmtVT)->getElementType() == SVT &&
234 "Unexpected shift-by-scalar type");
235 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
245 Amt =
Builder.CreateShuffleVector(Amt, ZeroSplat);
246 return (LogicalShift ? (ShiftLeft ?
Builder.CreateShl(Vec, Amt)
247 :
Builder.CreateLShr(Vec, Amt))
248 :
Builder.CreateAShr(Vec, Amt));
253 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
260 cast<VectorType>(AmtVT)->getElementType() == SVT &&
261 "Unexpected shift-by-scalar type");
265 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
266 unsigned SubEltIdx = (NumSubElts - 1) - i;
267 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
288 auto ShiftVec =
Builder.CreateVectorSplat(VWidth, ShiftAmt);
291 return Builder.CreateShl(Vec, ShiftVec);
294 return Builder.CreateLShr(Vec, ShiftVec);
296 return Builder.CreateAShr(Vec, ShiftVec);
304 bool LogicalShift =
false;
305 bool ShiftLeft =
false;
310 case Intrinsic::x86_avx2_psrav_d:
311 case Intrinsic::x86_avx2_psrav_d_256:
312 case Intrinsic::x86_avx512_psrav_q_128:
313 case Intrinsic::x86_avx512_psrav_q_256:
314 case Intrinsic::x86_avx512_psrav_d_512:
315 case Intrinsic::x86_avx512_psrav_q_512:
316 case Intrinsic::x86_avx512_psrav_w_128:
317 case Intrinsic::x86_avx512_psrav_w_256:
318 case Intrinsic::x86_avx512_psrav_w_512:
319 LogicalShift =
false;
322 case Intrinsic::x86_avx2_psrlv_d:
323 case Intrinsic::x86_avx2_psrlv_d_256:
324 case Intrinsic::x86_avx2_psrlv_q:
325 case Intrinsic::x86_avx2_psrlv_q_256:
326 case Intrinsic::x86_avx512_psrlv_d_512:
327 case Intrinsic::x86_avx512_psrlv_q_512:
328 case Intrinsic::x86_avx512_psrlv_w_128:
329 case Intrinsic::x86_avx512_psrlv_w_256:
330 case Intrinsic::x86_avx512_psrlv_w_512:
334 case Intrinsic::x86_avx2_psllv_d:
335 case Intrinsic::x86_avx2_psllv_d_256:
336 case Intrinsic::x86_avx2_psllv_q:
337 case Intrinsic::x86_avx2_psllv_q_256:
338 case Intrinsic::x86_avx512_psllv_d_512:
339 case Intrinsic::x86_avx512_psllv_q_512:
340 case Intrinsic::x86_avx512_psllv_w_128:
341 case Intrinsic::x86_avx512_psllv_w_256:
342 case Intrinsic::x86_avx512_psllv_w_512:
347 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
351 auto *VT = cast<FixedVectorType>(II.
getType());
352 Type *SVT = VT->getElementType();
353 int NumElts = VT->getNumElements();
361 return (LogicalShift ? (ShiftLeft ?
Builder.CreateShl(Vec, Amt)
362 :
Builder.CreateLShr(Vec, Amt))
363 :
Builder.CreateAShr(Vec, Amt));
367 auto *CShift = dyn_cast<Constant>(Amt);
373 bool AnyOutOfRange =
false;
375 for (
int I = 0;
I < NumElts; ++
I) {
376 auto *CElt = CShift->getAggregateElement(
I);
377 if (isa_and_nonnull<UndefValue>(CElt)) {
382 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
389 APInt ShiftVal = COp->getValue();
391 AnyOutOfRange = LogicalShift;
404 for (
int Idx : ShiftAmts) {
408 assert(LogicalShift &&
"Logical shift expected");
409 ConstantVec.
push_back(ConstantInt::getNullValue(SVT));
421 for (
int Idx : ShiftAmts) {
430 return Builder.CreateShl(Vec, ShiftVec);
433 return Builder.CreateLShr(Vec, ShiftVec);
435 return Builder.CreateAShr(Vec, ShiftVec);
445 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
448 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
450 unsigned NumSrcElts = ArgTy->getNumElements();
451 assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
452 "Unexpected packing types");
454 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
456 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
457 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
458 "Unexpected packing types");
461 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
466 APInt MinValue, MaxValue;
485 Arg0 =
Builder.CreateSelect(
Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
486 Arg1 =
Builder.CreateSelect(
Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
487 Arg0 =
Builder.CreateSelect(
Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
488 Arg1 =
Builder.CreateSelect(
Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
492 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
493 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
494 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane));
495 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
496 PackMask.
push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
498 auto *
Shuffle =
Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
510 if (isa<UndefValue>(
Arg))
513 auto *ArgTy = dyn_cast<FixedVectorType>(
Arg->getType());
523 unsigned NumElts = ArgTy->getNumElements();
526 Value *Res =
Builder.CreateBitCast(
Arg, VectorType::getInteger(ArgTy));
527 Res =
Builder.CreateIsNeg(Res);
528 Res =
Builder.CreateBitCast(Res, IntegerTy);
529 Res =
Builder.CreateZExtOrTrunc(Res, ResTy);
540 assert(
RetTy->getStructElementType(0)->isIntegerTy(8) &&
541 RetTy->getStructElementType(1) == OpTy && OpTy == Op2->
getType() &&
542 "Unexpected types for x86 addcarry");
546 Value *UAdd =
Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
549 Value *UAddResult =
Builder.CreateExtractValue(UAdd, 0);
553 Res =
Builder.CreateInsertValue(Res, UAddOV, 0);
554 return Builder.CreateInsertValue(Res, UAddResult, 1);
566 auto *VecTy = cast<FixedVectorType>(II.
getType());
567 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
574 uint8_t Imm = CInt->getZExtValue();
575 uint8_t ZMask = Imm & 0xf;
576 uint8_t DestLane = (Imm >> 4) & 0x3;
577 uint8_t SourceLane = (Imm >> 6) & 0x3;
587 int ShuffleMask[4] = {0, 1, 2, 3};
596 (ZMask & (1 << DestLane))) {
600 ShuffleMask[DestLane] = SourceLane;
602 for (
unsigned i = 0; i < 4; ++i)
603 if ((ZMask >> i) & 0x1)
604 ShuffleMask[i] = i + 4;
611 ShuffleMask[DestLane] = SourceLane + 4;
622 auto LowConstantHighUndef = [&](
uint64_t Val) {
630 auto *C0 = dyn_cast<Constant>(Op0);
632 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
636 if (CILength && CIIndex) {
670 for (
int i = 0; i != (int)
Length; ++i)
672 for (
int i =
Length; i != 8; ++i)
674 for (
int i = 8; i != 16; ++i)
678 Builder.CreateBitCast(Op0, ShufTy),
686 APInt Elt = CI0->getValue();
694 Value *Args[] = {Op0, CILength, CIIndex};
702 if (CI0 && CI0->isZero())
703 return LowConstantHighUndef(0);
747 for (
int i = 0; i != (int)
Index; ++i)
749 for (
int i = 0; i != (int)
Length; ++i)
753 for (
int i = 8; i != 16; ++i)
757 Builder.CreateBitCast(Op1, ShufTy),
763 auto *C0 = dyn_cast<Constant>(Op0);
764 auto *C1 = dyn_cast<Constant>(Op1);
766 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
769 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
774 APInt V00 = CI00->getValue();
775 APInt V10 = CI10->getValue();
779 APInt Val = V00 | V10;
793 Value *Args[] = {Op0, Op1, CILength, CIIndex};
809 auto *VecTy = cast<FixedVectorType>(II.
getType());
810 unsigned NumElts = VecTy->getNumElements();
811 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
812 "Unexpected number of elements in shuffle mask!");
819 for (
unsigned I = 0;
I < NumElts; ++
I) {
820 Constant *COp = V->getAggregateElement(
I);
821 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
824 if (isa<UndefValue>(COp)) {
829 int8_t
Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
844 return Builder.CreateShuffleVector(V1, V2,
ArrayRef(Indexes, NumElts));
854 auto *VecTy = cast<FixedVectorType>(II.
getType());
855 unsigned NumElts = VecTy->getNumElements();
856 bool IsPD = VecTy->getScalarType()->isDoubleTy();
857 unsigned NumLaneElts = IsPD ? 2 : 4;
858 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
864 for (
unsigned I = 0;
I < NumElts; ++
I) {
865 Constant *COp = V->getAggregateElement(
I);
866 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
869 if (isa<UndefValue>(COp)) {
874 APInt Index = cast<ConstantInt>(COp)->getValue();
885 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
887 Indexes[
I] =
Index.getZExtValue();
901 auto *VecTy = cast<FixedVectorType>(II.
getType());
902 unsigned Size = VecTy->getNumElements();
904 "Unexpected shuffle mask size");
909 for (
unsigned I = 0;
I <
Size; ++
I) {
910 Constant *COp = V->getAggregateElement(
I);
911 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
914 if (isa<UndefValue>(COp)) {
928std::optional<Instruction *>
930 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *Op,
unsigned Width,
931 unsigned DemandedWidth) {
932 APInt UndefElts(Width, 0);
939 case Intrinsic::x86_bmi_bextr_32:
940 case Intrinsic::x86_bmi_bextr_64:
941 case Intrinsic::x86_tbm_bextri_u32:
942 case Intrinsic::x86_tbm_bextri_u64:
954 if (
auto *InC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
955 uint64_t Result = InC->getZExtValue() >> Shift;
958 Result &= maskTrailingOnes<uint64_t>(
Length);
967 case Intrinsic::x86_bmi_bzhi_32:
968 case Intrinsic::x86_bmi_bzhi_64:
980 if (
auto *InC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
981 uint64_t Result = InC->getZExtValue();
982 Result &= maskTrailingOnes<uint64_t>(
Index);
989 case Intrinsic::x86_bmi_pext_32:
990 case Intrinsic::x86_bmi_pext_64:
991 if (
auto *MaskC = dyn_cast<ConstantInt>(II.
getArgOperand(1))) {
992 if (MaskC->isNullValue()) {
995 if (MaskC->isAllOnesValue()) {
999 unsigned MaskIdx, MaskLen;
1000 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
1011 if (
auto *SrcC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
1012 uint64_t Src = SrcC->getZExtValue();
1013 uint64_t Mask = MaskC->getZExtValue();
1020 if (BitToTest & Src)
1033 case Intrinsic::x86_bmi_pdep_32:
1034 case Intrinsic::x86_bmi_pdep_64:
1035 if (
auto *MaskC = dyn_cast<ConstantInt>(II.
getArgOperand(1))) {
1036 if (MaskC->isNullValue()) {
1039 if (MaskC->isAllOnesValue()) {
1043 unsigned MaskIdx, MaskLen;
1044 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
1055 if (
auto *SrcC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
1056 uint64_t Src = SrcC->getZExtValue();
1057 uint64_t Mask = MaskC->getZExtValue();
1064 if (BitToTest & Src)
1078 case Intrinsic::x86_sse_cvtss2si:
1079 case Intrinsic::x86_sse_cvtss2si64:
1080 case Intrinsic::x86_sse_cvttss2si:
1081 case Intrinsic::x86_sse_cvttss2si64:
1082 case Intrinsic::x86_sse2_cvtsd2si:
1083 case Intrinsic::x86_sse2_cvtsd2si64:
1084 case Intrinsic::x86_sse2_cvttsd2si:
1085 case Intrinsic::x86_sse2_cvttsd2si64:
1086 case Intrinsic::x86_avx512_vcvtss2si32:
1087 case Intrinsic::x86_avx512_vcvtss2si64:
1088 case Intrinsic::x86_avx512_vcvtss2usi32:
1089 case Intrinsic::x86_avx512_vcvtss2usi64:
1090 case Intrinsic::x86_avx512_vcvtsd2si32:
1091 case Intrinsic::x86_avx512_vcvtsd2si64:
1092 case Intrinsic::x86_avx512_vcvtsd2usi32:
1093 case Intrinsic::x86_avx512_vcvtsd2usi64:
1094 case Intrinsic::x86_avx512_cvttss2si:
1095 case Intrinsic::x86_avx512_cvttss2si64:
1096 case Intrinsic::x86_avx512_cvttss2usi:
1097 case Intrinsic::x86_avx512_cvttss2usi64:
1098 case Intrinsic::x86_avx512_cvttsd2si:
1099 case Intrinsic::x86_avx512_cvttsd2si64:
1100 case Intrinsic::x86_avx512_cvttsd2usi:
1101 case Intrinsic::x86_avx512_cvttsd2usi64: {
1105 unsigned VWidth = cast<FixedVectorType>(
Arg->getType())->getNumElements();
1106 if (
Value *V = SimplifyDemandedVectorEltsLow(
Arg, VWidth, 1)) {
1112 case Intrinsic::x86_mmx_pmovmskb:
1113 case Intrinsic::x86_sse_movmsk_ps:
1114 case Intrinsic::x86_sse2_movmsk_pd:
1115 case Intrinsic::x86_sse2_pmovmskb_128:
1116 case Intrinsic::x86_avx_movmsk_pd_256:
1117 case Intrinsic::x86_avx_movmsk_ps_256:
1118 case Intrinsic::x86_avx2_pmovmskb:
1124 case Intrinsic::x86_sse_comieq_ss:
1125 case Intrinsic::x86_sse_comige_ss:
1126 case Intrinsic::x86_sse_comigt_ss:
1127 case Intrinsic::x86_sse_comile_ss:
1128 case Intrinsic::x86_sse_comilt_ss:
1129 case Intrinsic::x86_sse_comineq_ss:
1130 case Intrinsic::x86_sse_ucomieq_ss:
1131 case Intrinsic::x86_sse_ucomige_ss:
1132 case Intrinsic::x86_sse_ucomigt_ss:
1133 case Intrinsic::x86_sse_ucomile_ss:
1134 case Intrinsic::x86_sse_ucomilt_ss:
1135 case Intrinsic::x86_sse_ucomineq_ss:
1136 case Intrinsic::x86_sse2_comieq_sd:
1137 case Intrinsic::x86_sse2_comige_sd:
1138 case Intrinsic::x86_sse2_comigt_sd:
1139 case Intrinsic::x86_sse2_comile_sd:
1140 case Intrinsic::x86_sse2_comilt_sd:
1141 case Intrinsic::x86_sse2_comineq_sd:
1142 case Intrinsic::x86_sse2_ucomieq_sd:
1143 case Intrinsic::x86_sse2_ucomige_sd:
1144 case Intrinsic::x86_sse2_ucomigt_sd:
1145 case Intrinsic::x86_sse2_ucomile_sd:
1146 case Intrinsic::x86_sse2_ucomilt_sd:
1147 case Intrinsic::x86_sse2_ucomineq_sd:
1148 case Intrinsic::x86_avx512_vcomi_ss:
1149 case Intrinsic::x86_avx512_vcomi_sd:
1150 case Intrinsic::x86_avx512_mask_cmp_ss:
1151 case Intrinsic::x86_avx512_mask_cmp_sd: {
1154 bool MadeChange =
false;
1157 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
1158 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
1162 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
1172 case Intrinsic::x86_avx512_add_ps_512:
1173 case Intrinsic::x86_avx512_div_ps_512:
1174 case Intrinsic::x86_avx512_mul_ps_512:
1175 case Intrinsic::x86_avx512_sub_ps_512:
1176 case Intrinsic::x86_avx512_add_pd_512:
1177 case Intrinsic::x86_avx512_div_pd_512:
1178 case Intrinsic::x86_avx512_mul_pd_512:
1179 case Intrinsic::x86_avx512_sub_pd_512:
1183 if (R->getValue() == 4) {
1191 case Intrinsic::x86_avx512_add_ps_512:
1192 case Intrinsic::x86_avx512_add_pd_512:
1195 case Intrinsic::x86_avx512_sub_ps_512:
1196 case Intrinsic::x86_avx512_sub_pd_512:
1199 case Intrinsic::x86_avx512_mul_ps_512:
1200 case Intrinsic::x86_avx512_mul_pd_512:
1203 case Intrinsic::x86_avx512_div_ps_512:
1204 case Intrinsic::x86_avx512_div_pd_512:
1214 case Intrinsic::x86_avx512_mask_add_ss_round:
1215 case Intrinsic::x86_avx512_mask_div_ss_round:
1216 case Intrinsic::x86_avx512_mask_mul_ss_round:
1217 case Intrinsic::x86_avx512_mask_sub_ss_round:
1218 case Intrinsic::x86_avx512_mask_add_sd_round:
1219 case Intrinsic::x86_avx512_mask_div_sd_round:
1220 case Intrinsic::x86_avx512_mask_mul_sd_round:
1221 case Intrinsic::x86_avx512_mask_sub_sd_round:
1225 if (R->getValue() == 4) {
1236 case Intrinsic::x86_avx512_mask_add_ss_round:
1237 case Intrinsic::x86_avx512_mask_add_sd_round:
1240 case Intrinsic::x86_avx512_mask_sub_ss_round:
1241 case Intrinsic::x86_avx512_mask_sub_sd_round:
1244 case Intrinsic::x86_avx512_mask_mul_ss_round:
1245 case Intrinsic::x86_avx512_mask_mul_sd_round:
1248 case Intrinsic::x86_avx512_mask_div_ss_round:
1249 case Intrinsic::x86_avx512_mask_div_sd_round:
1256 auto *
C = dyn_cast<ConstantInt>(Mask);
1258 if (!
C || !
C->getValue()[0]) {
1262 cast<IntegerType>(Mask->getType())->
getBitWidth());
1282 case Intrinsic::x86_sse2_psrai_d:
1283 case Intrinsic::x86_sse2_psrai_w:
1284 case Intrinsic::x86_avx2_psrai_d:
1285 case Intrinsic::x86_avx2_psrai_w:
1286 case Intrinsic::x86_avx512_psrai_q_128:
1287 case Intrinsic::x86_avx512_psrai_q_256:
1288 case Intrinsic::x86_avx512_psrai_d_512:
1289 case Intrinsic::x86_avx512_psrai_q_512:
1290 case Intrinsic::x86_avx512_psrai_w_512:
1291 case Intrinsic::x86_sse2_psrli_d:
1292 case Intrinsic::x86_sse2_psrli_q:
1293 case Intrinsic::x86_sse2_psrli_w:
1294 case Intrinsic::x86_avx2_psrli_d:
1295 case Intrinsic::x86_avx2_psrli_q:
1296 case Intrinsic::x86_avx2_psrli_w:
1297 case Intrinsic::x86_avx512_psrli_d_512:
1298 case Intrinsic::x86_avx512_psrli_q_512:
1299 case Intrinsic::x86_avx512_psrli_w_512:
1300 case Intrinsic::x86_sse2_pslli_d:
1301 case Intrinsic::x86_sse2_pslli_q:
1302 case Intrinsic::x86_sse2_pslli_w:
1303 case Intrinsic::x86_avx2_pslli_d:
1304 case Intrinsic::x86_avx2_pslli_q:
1305 case Intrinsic::x86_avx2_pslli_w:
1306 case Intrinsic::x86_avx512_pslli_d_512:
1307 case Intrinsic::x86_avx512_pslli_q_512:
1308 case Intrinsic::x86_avx512_pslli_w_512:
1314 case Intrinsic::x86_sse2_psra_d:
1315 case Intrinsic::x86_sse2_psra_w:
1316 case Intrinsic::x86_avx2_psra_d:
1317 case Intrinsic::x86_avx2_psra_w:
1318 case Intrinsic::x86_avx512_psra_q_128:
1319 case Intrinsic::x86_avx512_psra_q_256:
1320 case Intrinsic::x86_avx512_psra_d_512:
1321 case Intrinsic::x86_avx512_psra_q_512:
1322 case Intrinsic::x86_avx512_psra_w_512:
1323 case Intrinsic::x86_sse2_psrl_d:
1324 case Intrinsic::x86_sse2_psrl_q:
1325 case Intrinsic::x86_sse2_psrl_w:
1326 case Intrinsic::x86_avx2_psrl_d:
1327 case Intrinsic::x86_avx2_psrl_q:
1328 case Intrinsic::x86_avx2_psrl_w:
1329 case Intrinsic::x86_avx512_psrl_d_512:
1330 case Intrinsic::x86_avx512_psrl_q_512:
1331 case Intrinsic::x86_avx512_psrl_w_512:
1332 case Intrinsic::x86_sse2_psll_d:
1333 case Intrinsic::x86_sse2_psll_q:
1334 case Intrinsic::x86_sse2_psll_w:
1335 case Intrinsic::x86_avx2_psll_d:
1336 case Intrinsic::x86_avx2_psll_q:
1337 case Intrinsic::x86_avx2_psll_w:
1338 case Intrinsic::x86_avx512_psll_d_512:
1339 case Intrinsic::x86_avx512_psll_q_512:
1340 case Intrinsic::x86_avx512_psll_w_512: {
1349 "Unexpected packed shift size");
1350 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
1352 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
1358 case Intrinsic::x86_avx2_psllv_d:
1359 case Intrinsic::x86_avx2_psllv_d_256:
1360 case Intrinsic::x86_avx2_psllv_q:
1361 case Intrinsic::x86_avx2_psllv_q_256:
1362 case Intrinsic::x86_avx512_psllv_d_512:
1363 case Intrinsic::x86_avx512_psllv_q_512:
1364 case Intrinsic::x86_avx512_psllv_w_128:
1365 case Intrinsic::x86_avx512_psllv_w_256:
1366 case Intrinsic::x86_avx512_psllv_w_512:
1367 case Intrinsic::x86_avx2_psrav_d:
1368 case Intrinsic::x86_avx2_psrav_d_256:
1369 case Intrinsic::x86_avx512_psrav_q_128:
1370 case Intrinsic::x86_avx512_psrav_q_256:
1371 case Intrinsic::x86_avx512_psrav_d_512:
1372 case Intrinsic::x86_avx512_psrav_q_512:
1373 case Intrinsic::x86_avx512_psrav_w_128:
1374 case Intrinsic::x86_avx512_psrav_w_256:
1375 case Intrinsic::x86_avx512_psrav_w_512:
1376 case Intrinsic::x86_avx2_psrlv_d:
1377 case Intrinsic::x86_avx2_psrlv_d_256:
1378 case Intrinsic::x86_avx2_psrlv_q:
1379 case Intrinsic::x86_avx2_psrlv_q_256:
1380 case Intrinsic::x86_avx512_psrlv_d_512:
1381 case Intrinsic::x86_avx512_psrlv_q_512:
1382 case Intrinsic::x86_avx512_psrlv_w_128:
1383 case Intrinsic::x86_avx512_psrlv_w_256:
1384 case Intrinsic::x86_avx512_psrlv_w_512:
1390 case Intrinsic::x86_sse2_packssdw_128:
1391 case Intrinsic::x86_sse2_packsswb_128:
1392 case Intrinsic::x86_avx2_packssdw:
1393 case Intrinsic::x86_avx2_packsswb:
1394 case Intrinsic::x86_avx512_packssdw_512:
1395 case Intrinsic::x86_avx512_packsswb_512:
1401 case Intrinsic::x86_sse2_packuswb_128:
1402 case Intrinsic::x86_sse41_packusdw:
1403 case Intrinsic::x86_avx2_packusdw:
1404 case Intrinsic::x86_avx2_packuswb:
1405 case Intrinsic::x86_avx512_packusdw_512:
1406 case Intrinsic::x86_avx512_packuswb_512:
1412 case Intrinsic::x86_pclmulqdq:
1413 case Intrinsic::x86_pclmulqdq_256:
1414 case Intrinsic::x86_pclmulqdq_512: {
1416 unsigned Imm =
C->getZExtValue();
1418 bool MadeChange =
false;
1422 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
1424 APInt UndefElts1(VWidth, 0);
1425 APInt DemandedElts1 =
1433 APInt UndefElts2(VWidth, 0);
1434 APInt DemandedElts2 =
1456 case Intrinsic::x86_sse41_insertps:
1462 case Intrinsic::x86_sse4a_extrq: {
1465 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1466 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
1469 VWidth1 == 16 &&
"Unexpected operand sizes");
1472 auto *C1 = dyn_cast<Constant>(Op1);
1474 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)0))
1477 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
1487 bool MadeChange =
false;
1488 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
1492 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
1502 case Intrinsic::x86_sse4a_extrqi: {
1506 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1508 "Unexpected operand size");
1511 auto *CILength = dyn_cast<ConstantInt>(II.
getArgOperand(1));
1521 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
1527 case Intrinsic::x86_sse4a_insertq: {
1530 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1533 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
1534 "Unexpected operand size");
1537 auto *C1 = dyn_cast<Constant>(Op1);
1539 C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((
unsigned)1))
1544 const APInt &V11 = CI11->getValue();
1554 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
1560 case Intrinsic::x86_sse4a_insertqi: {
1566 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1567 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
1570 VWidth1 == 2 &&
"Unexpected operand sizes");
1573 auto *CILength = dyn_cast<ConstantInt>(II.
getArgOperand(2));
1577 if (CILength && CIIndex) {
1578 APInt Len = CILength->getValue().zextOrTrunc(6);
1579 APInt Idx = CIIndex->getValue().zextOrTrunc(6);
1587 bool MadeChange =
false;
1588 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
1592 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
1602 case Intrinsic::x86_sse41_pblendvb:
1603 case Intrinsic::x86_sse41_blendvps:
1604 case Intrinsic::x86_sse41_blendvpd:
1605 case Intrinsic::x86_avx_blendv_ps_256:
1606 case Intrinsic::x86_avx_blendv_pd_256:
1607 case Intrinsic::x86_avx2_pblendvb: {
1617 if (isa<ConstantAggregateZero>(Mask)) {
1622 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
1634 assert(Mask->getType()->getPrimitiveSizeInBits() ==
1636 "Not expecting mask and operands with different sizes");
1638 unsigned NumMaskElts =
1639 cast<FixedVectorType>(Mask->getType())->getNumElements();
1640 unsigned NumOperandElts =
1641 cast<FixedVectorType>(II.
getType())->getNumElements();
1642 if (NumMaskElts == NumOperandElts) {
1648 if (NumMaskElts < NumOperandElts) {
1659 case Intrinsic::x86_ssse3_pshuf_b_128:
1660 case Intrinsic::x86_avx2_pshuf_b:
1661 case Intrinsic::x86_avx512_pshuf_b_512:
1667 case Intrinsic::x86_avx_vpermilvar_ps:
1668 case Intrinsic::x86_avx_vpermilvar_ps_256:
1669 case Intrinsic::x86_avx512_vpermilvar_ps_512:
1670 case Intrinsic::x86_avx_vpermilvar_pd:
1671 case Intrinsic::x86_avx_vpermilvar_pd_256:
1672 case Intrinsic::x86_avx512_vpermilvar_pd_512:
1678 case Intrinsic::x86_avx2_permd:
1679 case Intrinsic::x86_avx2_permps:
1680 case Intrinsic::x86_avx512_permvar_df_256:
1681 case Intrinsic::x86_avx512_permvar_df_512:
1682 case Intrinsic::x86_avx512_permvar_di_256:
1683 case Intrinsic::x86_avx512_permvar_di_512:
1684 case Intrinsic::x86_avx512_permvar_hi_128:
1685 case Intrinsic::x86_avx512_permvar_hi_256:
1686 case Intrinsic::x86_avx512_permvar_hi_512:
1687 case Intrinsic::x86_avx512_permvar_qi_128:
1688 case Intrinsic::x86_avx512_permvar_qi_256:
1689 case Intrinsic::x86_avx512_permvar_qi_512:
1690 case Intrinsic::x86_avx512_permvar_sf_512:
1691 case Intrinsic::x86_avx512_permvar_si_512:
1697 case Intrinsic::x86_avx_maskload_ps:
1698 case Intrinsic::x86_avx_maskload_pd:
1699 case Intrinsic::x86_avx_maskload_ps_256:
1700 case Intrinsic::x86_avx_maskload_pd_256:
1701 case Intrinsic::x86_avx2_maskload_d:
1702 case Intrinsic::x86_avx2_maskload_q:
1703 case Intrinsic::x86_avx2_maskload_d_256:
1704 case Intrinsic::x86_avx2_maskload_q_256:
1710 case Intrinsic::x86_sse2_maskmov_dqu:
1711 case Intrinsic::x86_avx_maskstore_ps:
1712 case Intrinsic::x86_avx_maskstore_pd:
1713 case Intrinsic::x86_avx_maskstore_ps_256:
1714 case Intrinsic::x86_avx_maskstore_pd_256:
1715 case Intrinsic::x86_avx2_maskstore_d:
1716 case Intrinsic::x86_avx2_maskstore_q:
1717 case Intrinsic::x86_avx2_maskstore_d_256:
1718 case Intrinsic::x86_avx2_maskstore_q_256:
1724 case Intrinsic::x86_addcarry_32:
1725 case Intrinsic::x86_addcarry_64:
1734 return std::nullopt;
1739 bool &KnownBitsComputed)
const {
1743 case Intrinsic::x86_mmx_pmovmskb:
1744 case Intrinsic::x86_sse_movmsk_ps:
1745 case Intrinsic::x86_sse2_movmsk_pd:
1746 case Intrinsic::x86_sse2_pmovmskb_128:
1747 case Intrinsic::x86_avx_movmsk_ps_256:
1748 case Intrinsic::x86_avx_movmsk_pd_256:
1749 case Intrinsic::x86_avx2_pmovmskb: {
1757 ArgWidth = ArgType->getNumElements();
1764 if (DemandedElts.
isZero()) {
1770 KnownBitsComputed =
true;
1774 return std::nullopt;
1781 simplifyAndSetOp)
const {
1782 unsigned VWidth = cast<FixedVectorType>(II.
getType())->getNumElements();
1786 case Intrinsic::x86_xop_vfrcz_ss:
1787 case Intrinsic::x86_xop_vfrcz_sd:
1792 if (!DemandedElts[0]) {
1799 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1802 UndefElts = UndefElts[0];
1806 case Intrinsic::x86_sse_rcp_ss:
1807 case Intrinsic::x86_sse_rsqrt_ss:
1808 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1811 if (!DemandedElts[0]) {
1822 case Intrinsic::x86_sse_min_ss:
1823 case Intrinsic::x86_sse_max_ss:
1824 case Intrinsic::x86_sse_cmp_ss:
1825 case Intrinsic::x86_sse2_min_sd:
1826 case Intrinsic::x86_sse2_max_sd:
1827 case Intrinsic::x86_sse2_cmp_sd: {
1828 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1831 if (!DemandedElts[0]) {
1838 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1850 case Intrinsic::x86_sse41_round_ss:
1851 case Intrinsic::x86_sse41_round_sd: {
1853 APInt DemandedElts2 = DemandedElts;
1855 simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
1858 if (!DemandedElts[0]) {
1865 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1870 UndefElts |= UndefElts2[0];
1877 case Intrinsic::x86_avx512_mask_add_ss_round:
1878 case Intrinsic::x86_avx512_mask_div_ss_round:
1879 case Intrinsic::x86_avx512_mask_mul_ss_round:
1880 case Intrinsic::x86_avx512_mask_sub_ss_round:
1881 case Intrinsic::x86_avx512_mask_max_ss_round:
1882 case Intrinsic::x86_avx512_mask_min_ss_round:
1883 case Intrinsic::x86_avx512_mask_add_sd_round:
1884 case Intrinsic::x86_avx512_mask_div_sd_round:
1885 case Intrinsic::x86_avx512_mask_mul_sd_round:
1886 case Intrinsic::x86_avx512_mask_sub_sd_round:
1887 case Intrinsic::x86_avx512_mask_max_sd_round:
1888 case Intrinsic::x86_avx512_mask_min_sd_round:
1889 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1892 if (!DemandedElts[0]) {
1899 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1900 simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
1904 if (!UndefElts2[0] || !UndefElts3[0])
1909 case Intrinsic::x86_sse3_addsub_pd:
1910 case Intrinsic::x86_sse3_addsub_ps:
1911 case Intrinsic::x86_avx_addsub_pd_256:
1912 case Intrinsic::x86_avx_addsub_ps_256: {
1917 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
1918 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
1919 if (IsSubOnly || IsAddOnly) {
1920 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
1925 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
1928 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1929 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1930 UndefElts &= UndefElts2;
1935 case Intrinsic::x86_avx2_psllv_d:
1936 case Intrinsic::x86_avx2_psllv_d_256:
1937 case Intrinsic::x86_avx2_psllv_q:
1938 case Intrinsic::x86_avx2_psllv_q_256:
1939 case Intrinsic::x86_avx2_psrlv_d:
1940 case Intrinsic::x86_avx2_psrlv_d_256:
1941 case Intrinsic::x86_avx2_psrlv_q:
1942 case Intrinsic::x86_avx2_psrlv_q_256:
1943 case Intrinsic::x86_avx2_psrav_d:
1944 case Intrinsic::x86_avx2_psrav_d_256: {
1945 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1946 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1947 UndefElts &= UndefElts2;
1951 case Intrinsic::x86_sse2_packssdw_128:
1952 case Intrinsic::x86_sse2_packsswb_128:
1953 case Intrinsic::x86_sse2_packuswb_128:
1954 case Intrinsic::x86_sse41_packusdw:
1955 case Intrinsic::x86_avx2_packssdw:
1956 case Intrinsic::x86_avx2_packsswb:
1957 case Intrinsic::x86_avx2_packusdw:
1958 case Intrinsic::x86_avx2_packuswb:
1959 case Intrinsic::x86_avx512_packssdw_512:
1960 case Intrinsic::x86_avx512_packsswb_512:
1961 case Intrinsic::x86_avx512_packusdw_512:
1962 case Intrinsic::x86_avx512_packuswb_512: {
1964 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
1965 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
1967 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
1968 unsigned VWidthPerLane = VWidth / NumLanes;
1969 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
1975 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
1976 APInt OpDemandedElts(InnerVWidth, 0);
1977 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
1978 unsigned LaneIdx = Lane * VWidthPerLane;
1979 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
1980 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
1981 if (DemandedElts[
Idx])
1982 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
1987 APInt OpUndefElts(InnerVWidth, 0);
1988 simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
1991 OpUndefElts = OpUndefElts.
zext(VWidth);
1992 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
1993 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
1994 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
1995 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
1996 UndefElts |= LaneElts;
2003 case Intrinsic::x86_ssse3_pshuf_b_128:
2004 case Intrinsic::x86_avx2_pshuf_b:
2005 case Intrinsic::x86_avx512_pshuf_b_512:
2007 case Intrinsic::x86_avx_vpermilvar_ps:
2008 case Intrinsic::x86_avx_vpermilvar_ps_256:
2009 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2010 case Intrinsic::x86_avx_vpermilvar_pd:
2011 case Intrinsic::x86_avx_vpermilvar_pd_256:
2012 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2014 case Intrinsic::x86_avx2_permd:
2015 case Intrinsic::x86_avx2_permps: {
2016 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
2022 case Intrinsic::x86_sse4a_extrq:
2023 case Intrinsic::x86_sse4a_extrqi:
2024 case Intrinsic::x86_sse4a_insertq:
2025 case Intrinsic::x86_sse4a_insertqi:
2029 return std::nullopt;
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file provides the interface for the instcombine pass implementation.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
static Value * simplifyX86addcarry(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86pack(IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
static Constant * getNegativeIsTrueBoolVec(Constant *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
static Value * simplifyX86movmsk(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * getBoolVecFromMask(Value *Mask)
Convert the x86 XMM integer vector mask to a vector of bools based on each element's most significant...
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
static Value * simplifyX86insertps(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Class for arbitrary precision integers.
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
void setHighBits(unsigned hiBits)
Set the top hiBits bits.
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
APInt sext(unsigned width) const
Sign extend to a new width.
APInt shl(unsigned shiftAmt) const
Left-shift function.
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
This class represents a no-op cast from one type to another.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_SGT
signed greater than
All zero aggregate value.
static ConstantAggregateZero * get(Type *Ty)
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Value * CreateFSub(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Value * CreateFDiv(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Value * CreateFAdd(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFMul(Value *L, Value *R, const Twine &Name="", MDNode *FPMD=nullptr)
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false)=0
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVMContext & getContext() const
All values hold a context through their type.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
bool match(Val *V, const Pattern &P)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
constexpr unsigned BitWidth
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isZero() const
Returns true if value is all zero.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.