18 #include "llvm/IR/IntrinsicsX86.h"
24 #define DEBUG_TYPE "x86tti"
40 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(
Mask))
62 if (isa<ConstantAggregateZero>(
Mask))
70 unsigned AddrSpace = cast<PointerType>(Ptr->
getType())->getAddressSpace();
92 if (isa<ConstantAggregateZero>(
Mask)) {
105 unsigned AddrSpace = cast<PointerType>(Ptr->
getType())->getAddressSpace();
121 bool LogicalShift =
false;
122 bool ShiftLeft =
false;
128 case Intrinsic::x86_sse2_psrai_d:
129 case Intrinsic::x86_sse2_psrai_w:
130 case Intrinsic::x86_avx2_psrai_d:
131 case Intrinsic::x86_avx2_psrai_w:
132 case Intrinsic::x86_avx512_psrai_q_128:
133 case Intrinsic::x86_avx512_psrai_q_256:
134 case Intrinsic::x86_avx512_psrai_d_512:
135 case Intrinsic::x86_avx512_psrai_q_512:
136 case Intrinsic::x86_avx512_psrai_w_512:
139 case Intrinsic::x86_sse2_psra_d:
140 case Intrinsic::x86_sse2_psra_w:
141 case Intrinsic::x86_avx2_psra_d:
142 case Intrinsic::x86_avx2_psra_w:
143 case Intrinsic::x86_avx512_psra_q_128:
144 case Intrinsic::x86_avx512_psra_q_256:
145 case Intrinsic::x86_avx512_psra_d_512:
146 case Intrinsic::x86_avx512_psra_q_512:
147 case Intrinsic::x86_avx512_psra_w_512:
148 LogicalShift =
false;
151 case Intrinsic::x86_sse2_psrli_d:
152 case Intrinsic::x86_sse2_psrli_q:
153 case Intrinsic::x86_sse2_psrli_w:
154 case Intrinsic::x86_avx2_psrli_d:
155 case Intrinsic::x86_avx2_psrli_q:
156 case Intrinsic::x86_avx2_psrli_w:
157 case Intrinsic::x86_avx512_psrli_d_512:
158 case Intrinsic::x86_avx512_psrli_q_512:
159 case Intrinsic::x86_avx512_psrli_w_512:
162 case Intrinsic::x86_sse2_psrl_d:
163 case Intrinsic::x86_sse2_psrl_q:
164 case Intrinsic::x86_sse2_psrl_w:
165 case Intrinsic::x86_avx2_psrl_d:
166 case Intrinsic::x86_avx2_psrl_q:
167 case Intrinsic::x86_avx2_psrl_w:
168 case Intrinsic::x86_avx512_psrl_d_512:
169 case Intrinsic::x86_avx512_psrl_q_512:
170 case Intrinsic::x86_avx512_psrl_w_512:
174 case Intrinsic::x86_sse2_pslli_d:
175 case Intrinsic::x86_sse2_pslli_q:
176 case Intrinsic::x86_sse2_pslli_w:
177 case Intrinsic::x86_avx2_pslli_d:
178 case Intrinsic::x86_avx2_pslli_q:
179 case Intrinsic::x86_avx2_pslli_w:
180 case Intrinsic::x86_avx512_pslli_d_512:
181 case Intrinsic::x86_avx512_pslli_q_512:
182 case Intrinsic::x86_avx512_pslli_w_512:
185 case Intrinsic::x86_sse2_psll_d:
186 case Intrinsic::x86_sse2_psll_q:
187 case Intrinsic::x86_sse2_psll_w:
188 case Intrinsic::x86_avx2_psll_d:
189 case Intrinsic::x86_avx2_psll_q:
190 case Intrinsic::x86_avx2_psll_w:
191 case Intrinsic::x86_avx512_psll_d_512:
192 case Intrinsic::x86_avx512_psll_q_512:
193 case Intrinsic::x86_avx512_psll_w_512:
198 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
202 auto *VT = cast<FixedVectorType>(Vec->
getType());
203 Type *SVT = VT->getElementType();
205 unsigned VWidth = VT->getNumElements();
216 Amt =
Builder.CreateZExtOrTrunc(Amt, SVT);
217 Amt =
Builder.CreateVectorSplat(VWidth, Amt);
218 return (LogicalShift ? (ShiftLeft ?
Builder.CreateShl(Vec, Amt)
219 :
Builder.CreateLShr(Vec, Amt))
220 :
Builder.CreateAShr(Vec, Amt));
226 return Builder.CreateAShr(Vec,
Builder.CreateVectorSplat(VWidth, Amt));
232 cast<VectorType>(AmtVT)->getElementType() == SVT &&
233 "Unexpected shift-by-scalar type");
234 unsigned NumAmtElts = cast<FixedVectorType>(AmtVT)->getNumElements();
244 Amt =
Builder.CreateShuffleVector(Amt, ZeroSplat);
245 return (LogicalShift ? (ShiftLeft ?
Builder.CreateShl(Vec, Amt)
246 :
Builder.CreateLShr(Vec, Amt))
247 :
Builder.CreateAShr(Vec, Amt));
252 auto *CDV = dyn_cast<ConstantDataVector>(Amt);
259 cast<VectorType>(AmtVT)->getElementType() == SVT &&
260 "Unexpected shift-by-scalar type");
264 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth;
i != NumSubElts; ++
i) {
265 unsigned SubEltIdx = (NumSubElts - 1) -
i;
266 auto *SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
287 auto ShiftVec =
Builder.CreateVectorSplat(VWidth, ShiftAmt);
290 return Builder.CreateShl(Vec, ShiftVec);
293 return Builder.CreateLShr(Vec, ShiftVec);
295 return Builder.CreateAShr(Vec, ShiftVec);
303 bool LogicalShift =
false;
304 bool ShiftLeft =
false;
309 case Intrinsic::x86_avx2_psrav_d:
310 case Intrinsic::x86_avx2_psrav_d_256:
311 case Intrinsic::x86_avx512_psrav_q_128:
312 case Intrinsic::x86_avx512_psrav_q_256:
313 case Intrinsic::x86_avx512_psrav_d_512:
314 case Intrinsic::x86_avx512_psrav_q_512:
315 case Intrinsic::x86_avx512_psrav_w_128:
316 case Intrinsic::x86_avx512_psrav_w_256:
317 case Intrinsic::x86_avx512_psrav_w_512:
318 LogicalShift =
false;
321 case Intrinsic::x86_avx2_psrlv_d:
322 case Intrinsic::x86_avx2_psrlv_d_256:
323 case Intrinsic::x86_avx2_psrlv_q:
324 case Intrinsic::x86_avx2_psrlv_q_256:
325 case Intrinsic::x86_avx512_psrlv_d_512:
326 case Intrinsic::x86_avx512_psrlv_q_512:
327 case Intrinsic::x86_avx512_psrlv_w_128:
328 case Intrinsic::x86_avx512_psrlv_w_256:
329 case Intrinsic::x86_avx512_psrlv_w_512:
333 case Intrinsic::x86_avx2_psllv_d:
334 case Intrinsic::x86_avx2_psllv_d_256:
335 case Intrinsic::x86_avx2_psllv_q:
336 case Intrinsic::x86_avx2_psllv_q_256:
337 case Intrinsic::x86_avx512_psllv_d_512:
338 case Intrinsic::x86_avx512_psllv_q_512:
339 case Intrinsic::x86_avx512_psllv_w_128:
340 case Intrinsic::x86_avx512_psllv_w_256:
341 case Intrinsic::x86_avx512_psllv_w_512:
346 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
350 auto *VT = cast<FixedVectorType>(II.
getType());
351 Type *SVT = VT->getElementType();
352 int NumElts = VT->getNumElements();
360 return (LogicalShift ? (ShiftLeft ?
Builder.CreateShl(Vec, Amt)
361 :
Builder.CreateLShr(Vec, Amt))
362 :
Builder.CreateAShr(Vec, Amt));
366 auto *CShift = dyn_cast<Constant>(Amt);
372 bool AnyOutOfRange =
false;
374 for (
int I = 0;
I < NumElts; ++
I) {
375 auto *CElt = CShift->getAggregateElement(
I);
376 if (isa_and_nonnull<UndefValue>(CElt)) {
377 ShiftAmts.push_back(-1);
381 auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
388 APInt ShiftVal = COp->getValue();
390 AnyOutOfRange = LogicalShift;
400 auto OutOfRange = [&](
int Idx) {
return (Idx < 0) || (
BitWidth <= Idx); };
403 for (
int Idx : ShiftAmts) {
407 assert(LogicalShift &&
"Logical shift expected");
420 for (
int Idx : ShiftAmts) {
429 return Builder.CreateShl(Vec, ShiftVec);
432 return Builder.CreateLShr(Vec, ShiftVec);
434 return Builder.CreateAShr(Vec, ShiftVec);
444 if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
447 auto *ArgTy = cast<FixedVectorType>(Arg0->
getType());
449 unsigned NumSrcElts = ArgTy->getNumElements();
450 assert(cast<FixedVectorType>(ResTy)->getNumElements() == (2 * NumSrcElts) &&
451 "Unexpected packing types");
453 unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
455 unsigned SrcScalarSizeInBits = ArgTy->getScalarSizeInBits();
456 assert(SrcScalarSizeInBits == (2 * DstScalarSizeInBits) &&
457 "Unexpected packing types");
460 if (!isa<Constant>(Arg0) || !isa<Constant>(Arg1))
465 APInt MinValue, MaxValue;
484 Arg0 =
Builder.CreateSelect(
Builder.CreateICmpSLT(Arg0, MinC), MinC, Arg0);
485 Arg1 =
Builder.CreateSelect(
Builder.CreateICmpSLT(Arg1, MinC), MinC, Arg1);
486 Arg0 =
Builder.CreateSelect(
Builder.CreateICmpSGT(Arg0, MaxC), MaxC, Arg0);
487 Arg1 =
Builder.CreateSelect(
Builder.CreateICmpSGT(Arg1, MaxC), MaxC, Arg1);
491 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
492 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
493 PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane));
494 for (
unsigned Elt = 0; Elt != NumSrcEltsPerLane; ++Elt)
495 PackMask.push_back(Elt + (Lane * NumSrcEltsPerLane) + NumSrcElts);
497 auto *Shuffle =
Builder.CreateShuffleVector(Arg0, Arg1, PackMask);
500 return Builder.CreateTrunc(Shuffle, ResTy);
509 if (isa<UndefValue>(
Arg))
512 auto *ArgTy = dyn_cast<FixedVectorType>(
Arg->getType());
522 unsigned NumElts = ArgTy->getNumElements();
526 Res =
Builder.CreateIsNeg(Res);
527 Res =
Builder.CreateBitCast(Res, IntegerTy);
528 Res =
Builder.CreateZExtOrTrunc(Res, ResTy);
541 "Unexpected types for x86 addcarry");
545 Value *UAdd =
Builder.CreateIntrinsic(Intrinsic::uadd_with_overflow, OpTy,
548 Value *UAddResult =
Builder.CreateExtractValue(UAdd, 0);
552 Res =
Builder.CreateInsertValue(Res, UAddOV, 0);
553 return Builder.CreateInsertValue(Res, UAddResult, 1);
565 auto *VecTy = cast<FixedVectorType>(II.
getType());
566 assert(VecTy->getNumElements() == 4 &&
"insertps with wrong vector type");
573 uint8_t Imm = CInt->getZExtValue();
574 uint8_t ZMask = Imm & 0xf;
575 uint8_t DestLane = (Imm >> 4) & 0
x3;
576 uint8_t SourceLane = (Imm >> 6) & 0
x3;
586 int ShuffleMask[4] = {0, 1, 2, 3};
595 (ZMask & (1 << DestLane))) {
599 ShuffleMask[DestLane] = SourceLane;
601 for (
unsigned i = 0;
i < 4; ++
i)
602 if ((ZMask >>
i) & 0x1)
603 ShuffleMask[
i] =
i + 4;
610 ShuffleMask[DestLane] = SourceLane + 4;
621 auto LowConstantHighUndef = [&](
uint64_t Val) {
629 auto *C0 = dyn_cast<Constant>(Op0);
631 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
635 if (CILength && CIIndex) {
645 unsigned Length = APLength == 0 ? 64 : APLength.
getZExtValue();
649 unsigned End = Index + Length;
660 if ((Length % 8) == 0 && (Index % 8) == 0) {
669 for (
int i = 0;
i != (
int)Length; ++
i)
670 ShuffleMask.push_back(
i + Index);
671 for (
int i = Length;
i != 8; ++
i)
672 ShuffleMask.push_back(
i + 16);
673 for (
int i = 8;
i != 16; ++
i)
674 ShuffleMask.push_back(-1);
677 Builder.CreateBitCast(Op0, ShufTy),
685 APInt Elt = CI0->getValue();
693 Value *
Args[] = {Op0, CILength, CIIndex};
701 if (CI0 && CI0->isZero())
702 return LowConstantHighUndef(0);
722 unsigned Length = APLength == 0 ? 64 : APLength.
getZExtValue();
726 unsigned End = Index + Length;
737 if ((Length % 8) == 0 && (Index % 8) == 0) {
746 for (
int i = 0;
i != (
int)Index; ++
i)
747 ShuffleMask.push_back(
i);
748 for (
int i = 0;
i != (
int)Length; ++
i)
749 ShuffleMask.push_back(
i + 16);
750 for (
int i = Index + Length;
i != 8; ++
i)
751 ShuffleMask.push_back(
i);
752 for (
int i = 8;
i != 16; ++
i)
753 ShuffleMask.push_back(-1);
756 Builder.CreateBitCast(Op1, ShufTy),
762 auto *C0 = dyn_cast<Constant>(Op0);
763 auto *
C1 = dyn_cast<Constant>(Op1);
765 C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((
unsigned)0))
768 C1 ? dyn_cast_or_null<ConstantInt>(
C1->getAggregateElement((
unsigned)0))
773 APInt V00 = CI00->getValue();
774 APInt V10 = CI10->getValue();
778 APInt Val = V00 | V10;
792 Value *
Args[] = {Op0, Op1, CILength, CIIndex};
808 auto *VecTy = cast<FixedVectorType>(II.
getType());
809 unsigned NumElts = VecTy->getNumElements();
810 assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
811 "Unexpected number of elements in shuffle mask!");
818 for (
unsigned I = 0;
I < NumElts; ++
I) {
820 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
823 if (isa<UndefValue>(COp)) {
828 int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
837 Index = ((Index < 0) ? NumElts : Index & 0x0F) + (
I & 0xF0);
853 auto *VecTy = cast<FixedVectorType>(II.
getType());
854 unsigned NumElts = VecTy->getNumElements();
855 bool IsPD = VecTy->getScalarType()->isDoubleTy();
856 unsigned NumLaneElts = IsPD ? 2 : 4;
857 assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
863 for (
unsigned I = 0;
I < NumElts; ++
I) {
865 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
868 if (isa<UndefValue>(COp)) {
873 APInt Index = cast<ConstantInt>(COp)->getValue();
874 Index = Index.zextOrTrunc(32).getLoBits(2);
879 Index.lshrInPlace(1);
884 Index +=
APInt(32, (
I / NumLaneElts) * NumLaneElts);
886 Indexes[
I] = Index.getZExtValue();
900 auto *VecTy = cast<FixedVectorType>(II.
getType());
901 unsigned Size = VecTy->getNumElements();
902 assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
903 "Unexpected shuffle mask size");
908 for (
unsigned I = 0;
I < Size; ++
I) {
910 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
913 if (isa<UndefValue>(COp)) {
918 uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
929 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
930 unsigned DemandedWidth) {
938 case Intrinsic::x86_bmi_bextr_32:
939 case Intrinsic::x86_bmi_bextr_64:
940 case Intrinsic::x86_tbm_bextri_u32:
941 case Intrinsic::x86_tbm_bextri_u64:
953 if (
auto *InC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
957 Result &= maskTrailingOnes<uint64_t>(Length);
966 case Intrinsic::x86_bmi_bzhi_32:
967 case Intrinsic::x86_bmi_bzhi_64:
970 uint64_t Index =
C->getZExtValue() & 0xff;
979 if (
auto *InC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
980 uint64_t Result = InC->getZExtValue();
981 Result &= maskTrailingOnes<uint64_t>(Index);
988 case Intrinsic::x86_bmi_pext_32:
989 case Intrinsic::x86_bmi_pext_64:
990 if (
auto *MaskC = dyn_cast<ConstantInt>(II.
getArgOperand(1))) {
991 if (MaskC->isNullValue()) {
994 if (MaskC->isAllOnesValue()) {
998 unsigned MaskIdx, MaskLen;
999 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
1010 if (
auto *SrcC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
1011 uint64_t Src = SrcC->getZExtValue();
1019 if (BitToTest & Src)
1032 case Intrinsic::x86_bmi_pdep_32:
1033 case Intrinsic::x86_bmi_pdep_64:
1034 if (
auto *MaskC = dyn_cast<ConstantInt>(II.
getArgOperand(1))) {
1035 if (MaskC->isNullValue()) {
1038 if (MaskC->isAllOnesValue()) {
1042 unsigned MaskIdx, MaskLen;
1043 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
1054 if (
auto *SrcC = dyn_cast<ConstantInt>(II.
getArgOperand(0))) {
1055 uint64_t Src = SrcC->getZExtValue();
1063 if (BitToTest & Src)
1077 case Intrinsic::x86_sse_cvtss2si:
1078 case Intrinsic::x86_sse_cvtss2si64:
1079 case Intrinsic::x86_sse_cvttss2si:
1080 case Intrinsic::x86_sse_cvttss2si64:
1081 case Intrinsic::x86_sse2_cvtsd2si:
1082 case Intrinsic::x86_sse2_cvtsd2si64:
1083 case Intrinsic::x86_sse2_cvttsd2si:
1084 case Intrinsic::x86_sse2_cvttsd2si64:
1085 case Intrinsic::x86_avx512_vcvtss2si32:
1086 case Intrinsic::x86_avx512_vcvtss2si64:
1087 case Intrinsic::x86_avx512_vcvtss2usi32:
1088 case Intrinsic::x86_avx512_vcvtss2usi64:
1089 case Intrinsic::x86_avx512_vcvtsd2si32:
1090 case Intrinsic::x86_avx512_vcvtsd2si64:
1091 case Intrinsic::x86_avx512_vcvtsd2usi32:
1092 case Intrinsic::x86_avx512_vcvtsd2usi64:
1093 case Intrinsic::x86_avx512_cvttss2si:
1094 case Intrinsic::x86_avx512_cvttss2si64:
1095 case Intrinsic::x86_avx512_cvttss2usi:
1096 case Intrinsic::x86_avx512_cvttss2usi64:
1097 case Intrinsic::x86_avx512_cvttsd2si:
1098 case Intrinsic::x86_avx512_cvttsd2si64:
1099 case Intrinsic::x86_avx512_cvttsd2usi:
1100 case Intrinsic::x86_avx512_cvttsd2usi64: {
1104 unsigned VWidth = cast<FixedVectorType>(
Arg->getType())->getNumElements();
1105 if (
Value *V = SimplifyDemandedVectorEltsLow(
Arg, VWidth, 1)) {
1111 case Intrinsic::x86_mmx_pmovmskb:
1112 case Intrinsic::x86_sse_movmsk_ps:
1113 case Intrinsic::x86_sse2_movmsk_pd:
1114 case Intrinsic::x86_sse2_pmovmskb_128:
1115 case Intrinsic::x86_avx_movmsk_pd_256:
1116 case Intrinsic::x86_avx_movmsk_ps_256:
1117 case Intrinsic::x86_avx2_pmovmskb:
1123 case Intrinsic::x86_sse_comieq_ss:
1124 case Intrinsic::x86_sse_comige_ss:
1125 case Intrinsic::x86_sse_comigt_ss:
1126 case Intrinsic::x86_sse_comile_ss:
1127 case Intrinsic::x86_sse_comilt_ss:
1128 case Intrinsic::x86_sse_comineq_ss:
1129 case Intrinsic::x86_sse_ucomieq_ss:
1130 case Intrinsic::x86_sse_ucomige_ss:
1131 case Intrinsic::x86_sse_ucomigt_ss:
1132 case Intrinsic::x86_sse_ucomile_ss:
1133 case Intrinsic::x86_sse_ucomilt_ss:
1134 case Intrinsic::x86_sse_ucomineq_ss:
1135 case Intrinsic::x86_sse2_comieq_sd:
1136 case Intrinsic::x86_sse2_comige_sd:
1137 case Intrinsic::x86_sse2_comigt_sd:
1138 case Intrinsic::x86_sse2_comile_sd:
1139 case Intrinsic::x86_sse2_comilt_sd:
1140 case Intrinsic::x86_sse2_comineq_sd:
1141 case Intrinsic::x86_sse2_ucomieq_sd:
1142 case Intrinsic::x86_sse2_ucomige_sd:
1143 case Intrinsic::x86_sse2_ucomigt_sd:
1144 case Intrinsic::x86_sse2_ucomile_sd:
1145 case Intrinsic::x86_sse2_ucomilt_sd:
1146 case Intrinsic::x86_sse2_ucomineq_sd:
1147 case Intrinsic::x86_avx512_vcomi_ss:
1148 case Intrinsic::x86_avx512_vcomi_sd:
1149 case Intrinsic::x86_avx512_mask_cmp_ss:
1150 case Intrinsic::x86_avx512_mask_cmp_sd: {
1153 bool MadeChange =
false;
1156 unsigned VWidth = cast<FixedVectorType>(Arg0->
getType())->getNumElements();
1157 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
1161 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
1171 case Intrinsic::x86_avx512_add_ps_512:
1172 case Intrinsic::x86_avx512_div_ps_512:
1173 case Intrinsic::x86_avx512_mul_ps_512:
1174 case Intrinsic::x86_avx512_sub_ps_512:
1175 case Intrinsic::x86_avx512_add_pd_512:
1176 case Intrinsic::x86_avx512_div_pd_512:
1177 case Intrinsic::x86_avx512_mul_pd_512:
1178 case Intrinsic::x86_avx512_sub_pd_512:
1182 if (R->getValue() == 4) {
1190 case Intrinsic::x86_avx512_add_ps_512:
1191 case Intrinsic::x86_avx512_add_pd_512:
1194 case Intrinsic::x86_avx512_sub_ps_512:
1195 case Intrinsic::x86_avx512_sub_pd_512:
1198 case Intrinsic::x86_avx512_mul_ps_512:
1199 case Intrinsic::x86_avx512_mul_pd_512:
1202 case Intrinsic::x86_avx512_div_ps_512:
1203 case Intrinsic::x86_avx512_div_pd_512:
1213 case Intrinsic::x86_avx512_mask_add_ss_round:
1214 case Intrinsic::x86_avx512_mask_div_ss_round:
1215 case Intrinsic::x86_avx512_mask_mul_ss_round:
1216 case Intrinsic::x86_avx512_mask_sub_ss_round:
1217 case Intrinsic::x86_avx512_mask_add_sd_round:
1218 case Intrinsic::x86_avx512_mask_div_sd_round:
1219 case Intrinsic::x86_avx512_mask_mul_sd_round:
1220 case Intrinsic::x86_avx512_mask_sub_sd_round:
1224 if (R->getValue() == 4) {
1235 case Intrinsic::x86_avx512_mask_add_ss_round:
1236 case Intrinsic::x86_avx512_mask_add_sd_round:
1239 case Intrinsic::x86_avx512_mask_sub_ss_round:
1240 case Intrinsic::x86_avx512_mask_sub_sd_round:
1243 case Intrinsic::x86_avx512_mask_mul_ss_round:
1244 case Intrinsic::x86_avx512_mask_mul_sd_round:
1247 case Intrinsic::x86_avx512_mask_div_ss_round:
1248 case Intrinsic::x86_avx512_mask_div_sd_round:
1255 auto *
C = dyn_cast<ConstantInt>(
Mask);
1257 if (!
C || !
C->getValue()[0]) {
1281 case Intrinsic::x86_sse2_psrai_d:
1282 case Intrinsic::x86_sse2_psrai_w:
1283 case Intrinsic::x86_avx2_psrai_d:
1284 case Intrinsic::x86_avx2_psrai_w:
1285 case Intrinsic::x86_avx512_psrai_q_128:
1286 case Intrinsic::x86_avx512_psrai_q_256:
1287 case Intrinsic::x86_avx512_psrai_d_512:
1288 case Intrinsic::x86_avx512_psrai_q_512:
1289 case Intrinsic::x86_avx512_psrai_w_512:
1290 case Intrinsic::x86_sse2_psrli_d:
1291 case Intrinsic::x86_sse2_psrli_q:
1292 case Intrinsic::x86_sse2_psrli_w:
1293 case Intrinsic::x86_avx2_psrli_d:
1294 case Intrinsic::x86_avx2_psrli_q:
1295 case Intrinsic::x86_avx2_psrli_w:
1296 case Intrinsic::x86_avx512_psrli_d_512:
1297 case Intrinsic::x86_avx512_psrli_q_512:
1298 case Intrinsic::x86_avx512_psrli_w_512:
1299 case Intrinsic::x86_sse2_pslli_d:
1300 case Intrinsic::x86_sse2_pslli_q:
1301 case Intrinsic::x86_sse2_pslli_w:
1302 case Intrinsic::x86_avx2_pslli_d:
1303 case Intrinsic::x86_avx2_pslli_q:
1304 case Intrinsic::x86_avx2_pslli_w:
1305 case Intrinsic::x86_avx512_pslli_d_512:
1306 case Intrinsic::x86_avx512_pslli_q_512:
1307 case Intrinsic::x86_avx512_pslli_w_512:
1313 case Intrinsic::x86_sse2_psra_d:
1314 case Intrinsic::x86_sse2_psra_w:
1315 case Intrinsic::x86_avx2_psra_d:
1316 case Intrinsic::x86_avx2_psra_w:
1317 case Intrinsic::x86_avx512_psra_q_128:
1318 case Intrinsic::x86_avx512_psra_q_256:
1319 case Intrinsic::x86_avx512_psra_d_512:
1320 case Intrinsic::x86_avx512_psra_q_512:
1321 case Intrinsic::x86_avx512_psra_w_512:
1322 case Intrinsic::x86_sse2_psrl_d:
1323 case Intrinsic::x86_sse2_psrl_q:
1324 case Intrinsic::x86_sse2_psrl_w:
1325 case Intrinsic::x86_avx2_psrl_d:
1326 case Intrinsic::x86_avx2_psrl_q:
1327 case Intrinsic::x86_avx2_psrl_w:
1328 case Intrinsic::x86_avx512_psrl_d_512:
1329 case Intrinsic::x86_avx512_psrl_q_512:
1330 case Intrinsic::x86_avx512_psrl_w_512:
1331 case Intrinsic::x86_sse2_psll_d:
1332 case Intrinsic::x86_sse2_psll_q:
1333 case Intrinsic::x86_sse2_psll_w:
1334 case Intrinsic::x86_avx2_psll_d:
1335 case Intrinsic::x86_avx2_psll_q:
1336 case Intrinsic::x86_avx2_psll_w:
1337 case Intrinsic::x86_avx512_psll_d_512:
1338 case Intrinsic::x86_avx512_psll_q_512:
1339 case Intrinsic::x86_avx512_psll_w_512: {
1348 "Unexpected packed shift size");
1349 unsigned VWidth = cast<FixedVectorType>(Arg1->
getType())->getNumElements();
1351 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
1357 case Intrinsic::x86_avx2_psllv_d:
1358 case Intrinsic::x86_avx2_psllv_d_256:
1359 case Intrinsic::x86_avx2_psllv_q:
1360 case Intrinsic::x86_avx2_psllv_q_256:
1361 case Intrinsic::x86_avx512_psllv_d_512:
1362 case Intrinsic::x86_avx512_psllv_q_512:
1363 case Intrinsic::x86_avx512_psllv_w_128:
1364 case Intrinsic::x86_avx512_psllv_w_256:
1365 case Intrinsic::x86_avx512_psllv_w_512:
1366 case Intrinsic::x86_avx2_psrav_d:
1367 case Intrinsic::x86_avx2_psrav_d_256:
1368 case Intrinsic::x86_avx512_psrav_q_128:
1369 case Intrinsic::x86_avx512_psrav_q_256:
1370 case Intrinsic::x86_avx512_psrav_d_512:
1371 case Intrinsic::x86_avx512_psrav_q_512:
1372 case Intrinsic::x86_avx512_psrav_w_128:
1373 case Intrinsic::x86_avx512_psrav_w_256:
1374 case Intrinsic::x86_avx512_psrav_w_512:
1375 case Intrinsic::x86_avx2_psrlv_d:
1376 case Intrinsic::x86_avx2_psrlv_d_256:
1377 case Intrinsic::x86_avx2_psrlv_q:
1378 case Intrinsic::x86_avx2_psrlv_q_256:
1379 case Intrinsic::x86_avx512_psrlv_d_512:
1380 case Intrinsic::x86_avx512_psrlv_q_512:
1381 case Intrinsic::x86_avx512_psrlv_w_128:
1382 case Intrinsic::x86_avx512_psrlv_w_256:
1383 case Intrinsic::x86_avx512_psrlv_w_512:
1389 case Intrinsic::x86_sse2_packssdw_128:
1390 case Intrinsic::x86_sse2_packsswb_128:
1391 case Intrinsic::x86_avx2_packssdw:
1392 case Intrinsic::x86_avx2_packsswb:
1393 case Intrinsic::x86_avx512_packssdw_512:
1394 case Intrinsic::x86_avx512_packsswb_512:
1400 case Intrinsic::x86_sse2_packuswb_128:
1401 case Intrinsic::x86_sse41_packusdw:
1402 case Intrinsic::x86_avx2_packusdw:
1403 case Intrinsic::x86_avx2_packuswb:
1404 case Intrinsic::x86_avx512_packusdw_512:
1405 case Intrinsic::x86_avx512_packuswb_512:
1411 case Intrinsic::x86_pclmulqdq:
1412 case Intrinsic::x86_pclmulqdq_256:
1413 case Intrinsic::x86_pclmulqdq_512: {
1415 unsigned Imm =
C->getZExtValue();
1417 bool MadeChange =
false;
1421 cast<FixedVectorType>(Arg0->
getType())->getNumElements();
1423 APInt UndefElts1(VWidth, 0);
1424 APInt DemandedElts1 =
1432 APInt UndefElts2(VWidth, 0);
1433 APInt DemandedElts2 =
1455 case Intrinsic::x86_sse41_insertps:
1461 case Intrinsic::x86_sse4a_extrq: {
1464 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1465 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
1468 VWidth1 == 16 &&
"Unexpected operand sizes");
1471 auto *
C1 = dyn_cast<Constant>(Op1);
1473 C1 ? dyn_cast_or_null<ConstantInt>(
C1->getAggregateElement((
unsigned)0))
1476 C1 ? dyn_cast_or_null<ConstantInt>(
C1->getAggregateElement((
unsigned)1))
1486 bool MadeChange =
false;
1487 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
1491 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
1501 case Intrinsic::x86_sse4a_extrqi: {
1505 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1507 "Unexpected operand size");
1510 auto *CILength = dyn_cast<ConstantInt>(II.
getArgOperand(1));
1520 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
1526 case Intrinsic::x86_sse4a_insertq: {
1529 unsigned VWidth = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1532 cast<FixedVectorType>(Op1->
getType())->getNumElements() == 2 &&
1533 "Unexpected operand size");
1536 auto *
C1 = dyn_cast<Constant>(Op1);
1538 C1 ? dyn_cast_or_null<ConstantInt>(
C1->getAggregateElement((
unsigned)1))
1543 const APInt &V11 = CI11->getValue();
1553 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
1559 case Intrinsic::x86_sse4a_insertqi: {
1565 unsigned VWidth0 = cast<FixedVectorType>(Op0->
getType())->getNumElements();
1566 unsigned VWidth1 = cast<FixedVectorType>(Op1->
getType())->getNumElements();
1569 VWidth1 == 2 &&
"Unexpected operand sizes");
1572 auto *CILength = dyn_cast<ConstantInt>(II.
getArgOperand(2));
1576 if (CILength && CIIndex) {
1586 bool MadeChange =
false;
1587 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
1591 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
1601 case Intrinsic::x86_sse41_pblendvb:
1602 case Intrinsic::x86_sse41_blendvps:
1603 case Intrinsic::x86_sse41_blendvpd:
1604 case Intrinsic::x86_avx_blendv_ps_256:
1605 case Intrinsic::x86_avx_blendv_pd_256:
1606 case Intrinsic::x86_avx2_pblendvb: {
1616 if (isa<ConstantAggregateZero>(
Mask)) {
1621 if (
auto *ConstantMask = dyn_cast<ConstantDataVector>(
Mask)) {
1633 assert(
Mask->getType()->getPrimitiveSizeInBits() ==
1635 "Not expecting mask and operands with different sizes");
1637 unsigned NumMaskElts =
1638 cast<FixedVectorType>(
Mask->getType())->getNumElements();
1639 unsigned NumOperandElts =
1640 cast<FixedVectorType>(II.
getType())->getNumElements();
1641 if (NumMaskElts == NumOperandElts) {
1647 if (NumMaskElts < NumOperandElts) {
1658 case Intrinsic::x86_ssse3_pshuf_b_128:
1659 case Intrinsic::x86_avx2_pshuf_b:
1660 case Intrinsic::x86_avx512_pshuf_b_512:
1666 case Intrinsic::x86_avx_vpermilvar_ps:
1667 case Intrinsic::x86_avx_vpermilvar_ps_256:
1668 case Intrinsic::x86_avx512_vpermilvar_ps_512:
1669 case Intrinsic::x86_avx_vpermilvar_pd:
1670 case Intrinsic::x86_avx_vpermilvar_pd_256:
1671 case Intrinsic::x86_avx512_vpermilvar_pd_512:
1677 case Intrinsic::x86_avx2_permd:
1678 case Intrinsic::x86_avx2_permps:
1679 case Intrinsic::x86_avx512_permvar_df_256:
1680 case Intrinsic::x86_avx512_permvar_df_512:
1681 case Intrinsic::x86_avx512_permvar_di_256:
1682 case Intrinsic::x86_avx512_permvar_di_512:
1683 case Intrinsic::x86_avx512_permvar_hi_128:
1684 case Intrinsic::x86_avx512_permvar_hi_256:
1685 case Intrinsic::x86_avx512_permvar_hi_512:
1686 case Intrinsic::x86_avx512_permvar_qi_128:
1687 case Intrinsic::x86_avx512_permvar_qi_256:
1688 case Intrinsic::x86_avx512_permvar_qi_512:
1689 case Intrinsic::x86_avx512_permvar_sf_512:
1690 case Intrinsic::x86_avx512_permvar_si_512:
1696 case Intrinsic::x86_avx_maskload_ps:
1697 case Intrinsic::x86_avx_maskload_pd:
1698 case Intrinsic::x86_avx_maskload_ps_256:
1699 case Intrinsic::x86_avx_maskload_pd_256:
1700 case Intrinsic::x86_avx2_maskload_d:
1701 case Intrinsic::x86_avx2_maskload_q:
1702 case Intrinsic::x86_avx2_maskload_d_256:
1703 case Intrinsic::x86_avx2_maskload_q_256:
1709 case Intrinsic::x86_sse2_maskmov_dqu:
1710 case Intrinsic::x86_avx_maskstore_ps:
1711 case Intrinsic::x86_avx_maskstore_pd:
1712 case Intrinsic::x86_avx_maskstore_ps_256:
1713 case Intrinsic::x86_avx_maskstore_pd_256:
1714 case Intrinsic::x86_avx2_maskstore_d:
1715 case Intrinsic::x86_avx2_maskstore_q:
1716 case Intrinsic::x86_avx2_maskstore_d_256:
1717 case Intrinsic::x86_avx2_maskstore_q_256:
1723 case Intrinsic::x86_addcarry_32:
1724 case Intrinsic::x86_addcarry_64:
1738 bool &KnownBitsComputed)
const {
1742 case Intrinsic::x86_mmx_pmovmskb:
1743 case Intrinsic::x86_sse_movmsk_ps:
1744 case Intrinsic::x86_sse2_movmsk_pd:
1745 case Intrinsic::x86_sse2_pmovmskb_128:
1746 case Intrinsic::x86_avx_movmsk_ps_256:
1747 case Intrinsic::x86_avx_movmsk_pd_256:
1748 case Intrinsic::x86_avx2_pmovmskb: {
1756 ArgWidth = ArgType->getNumElements();
1763 if (DemandedElts.
isZero()) {
1769 KnownBitsComputed =
true;
1780 simplifyAndSetOp)
const {
1781 unsigned VWidth = cast<FixedVectorType>(II.
getType())->getNumElements();
1785 case Intrinsic::x86_xop_vfrcz_ss:
1786 case Intrinsic::x86_xop_vfrcz_sd:
1791 if (!DemandedElts[0]) {
1798 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1801 UndefElts = UndefElts[0];
1805 case Intrinsic::x86_sse_rcp_ss:
1806 case Intrinsic::x86_sse_rsqrt_ss:
1807 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1810 if (!DemandedElts[0]) {
1821 case Intrinsic::x86_sse_min_ss:
1822 case Intrinsic::x86_sse_max_ss:
1823 case Intrinsic::x86_sse_cmp_ss:
1824 case Intrinsic::x86_sse2_min_sd:
1825 case Intrinsic::x86_sse2_max_sd:
1826 case Intrinsic::x86_sse2_cmp_sd: {
1827 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1830 if (!DemandedElts[0]) {
1837 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1849 case Intrinsic::x86_sse41_round_ss:
1850 case Intrinsic::x86_sse41_round_sd: {
1852 APInt DemandedElts2 = DemandedElts;
1854 simplifyAndSetOp(&II, 0, DemandedElts2, UndefElts);
1857 if (!DemandedElts[0]) {
1864 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1869 UndefElts |= UndefElts2[0];
1876 case Intrinsic::x86_avx512_mask_add_ss_round:
1877 case Intrinsic::x86_avx512_mask_div_ss_round:
1878 case Intrinsic::x86_avx512_mask_mul_ss_round:
1879 case Intrinsic::x86_avx512_mask_sub_ss_round:
1880 case Intrinsic::x86_avx512_mask_max_ss_round:
1881 case Intrinsic::x86_avx512_mask_min_ss_round:
1882 case Intrinsic::x86_avx512_mask_add_sd_round:
1883 case Intrinsic::x86_avx512_mask_div_sd_round:
1884 case Intrinsic::x86_avx512_mask_mul_sd_round:
1885 case Intrinsic::x86_avx512_mask_sub_sd_round:
1886 case Intrinsic::x86_avx512_mask_max_sd_round:
1887 case Intrinsic::x86_avx512_mask_min_sd_round:
1888 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1891 if (!DemandedElts[0]) {
1898 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1899 simplifyAndSetOp(&II, 2, DemandedElts, UndefElts3);
1903 if (!UndefElts2[0] || !UndefElts3[0])
1908 case Intrinsic::x86_sse3_addsub_pd:
1909 case Intrinsic::x86_sse3_addsub_ps:
1910 case Intrinsic::x86_avx_addsub_pd_256:
1911 case Intrinsic::x86_avx_addsub_ps_256: {
1916 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
1917 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
1918 if (IsSubOnly || IsAddOnly) {
1919 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
1924 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
1927 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1928 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1929 UndefElts &= UndefElts2;
1934 case Intrinsic::x86_avx2_psllv_d:
1935 case Intrinsic::x86_avx2_psllv_d_256:
1936 case Intrinsic::x86_avx2_psllv_q:
1937 case Intrinsic::x86_avx2_psllv_q_256:
1938 case Intrinsic::x86_avx2_psrlv_d:
1939 case Intrinsic::x86_avx2_psrlv_d_256:
1940 case Intrinsic::x86_avx2_psrlv_q:
1941 case Intrinsic::x86_avx2_psrlv_q_256:
1942 case Intrinsic::x86_avx2_psrav_d:
1943 case Intrinsic::x86_avx2_psrav_d_256: {
1944 simplifyAndSetOp(&II, 0, DemandedElts, UndefElts);
1945 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts2);
1946 UndefElts &= UndefElts2;
1950 case Intrinsic::x86_sse2_packssdw_128:
1951 case Intrinsic::x86_sse2_packsswb_128:
1952 case Intrinsic::x86_sse2_packuswb_128:
1953 case Intrinsic::x86_sse41_packusdw:
1954 case Intrinsic::x86_avx2_packssdw:
1955 case Intrinsic::x86_avx2_packsswb:
1956 case Intrinsic::x86_avx2_packusdw:
1957 case Intrinsic::x86_avx2_packuswb:
1958 case Intrinsic::x86_avx512_packssdw_512:
1959 case Intrinsic::x86_avx512_packsswb_512:
1960 case Intrinsic::x86_avx512_packusdw_512:
1961 case Intrinsic::x86_avx512_packuswb_512: {
1963 unsigned InnerVWidth = cast<FixedVectorType>(Ty0)->getNumElements();
1964 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
1966 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
1967 unsigned VWidthPerLane = VWidth / NumLanes;
1968 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
1974 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
1975 APInt OpDemandedElts(InnerVWidth, 0);
1976 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
1977 unsigned LaneIdx = Lane * VWidthPerLane;
1978 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
1979 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
1980 if (DemandedElts[Idx])
1981 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
1986 APInt OpUndefElts(InnerVWidth, 0);
1987 simplifyAndSetOp(&II, OpNum, OpDemandedElts, OpUndefElts);
1990 OpUndefElts = OpUndefElts.
zext(VWidth);
1991 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
1992 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
1993 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
1994 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
1995 UndefElts |= LaneElts;
2002 case Intrinsic::x86_ssse3_pshuf_b_128:
2003 case Intrinsic::x86_avx2_pshuf_b:
2004 case Intrinsic::x86_avx512_pshuf_b_512:
2006 case Intrinsic::x86_avx_vpermilvar_ps:
2007 case Intrinsic::x86_avx_vpermilvar_ps_256:
2008 case Intrinsic::x86_avx512_vpermilvar_ps_512:
2009 case Intrinsic::x86_avx_vpermilvar_pd:
2010 case Intrinsic::x86_avx_vpermilvar_pd_256:
2011 case Intrinsic::x86_avx512_vpermilvar_pd_512:
2013 case Intrinsic::x86_avx2_permd:
2014 case Intrinsic::x86_avx2_permps: {
2015 simplifyAndSetOp(&II, 1, DemandedElts, UndefElts);
2021 case Intrinsic::x86_sse4a_extrq:
2022 case Intrinsic::x86_sse4a_extrqi:
2023 case Intrinsic::x86_sse4a_insertq:
2024 case Intrinsic::x86_sse4a_insertqi: