117 bool LogicalShift =
false;
118 bool ShiftLeft =
false;
121 switch (
II.getIntrinsicID()) {
124 case Intrinsic::x86_sse2_psrai_d:
125 case Intrinsic::x86_sse2_psrai_w:
126 case Intrinsic::x86_avx2_psrai_d:
127 case Intrinsic::x86_avx2_psrai_w:
128 case Intrinsic::x86_avx512_psrai_q_128:
129 case Intrinsic::x86_avx512_psrai_q_256:
130 case Intrinsic::x86_avx512_psrai_d_512:
131 case Intrinsic::x86_avx512_psrai_q_512:
132 case Intrinsic::x86_avx512_psrai_w_512:
135 case Intrinsic::x86_sse2_psra_d:
136 case Intrinsic::x86_sse2_psra_w:
137 case Intrinsic::x86_avx2_psra_d:
138 case Intrinsic::x86_avx2_psra_w:
139 case Intrinsic::x86_avx512_psra_q_128:
140 case Intrinsic::x86_avx512_psra_q_256:
141 case Intrinsic::x86_avx512_psra_d_512:
142 case Intrinsic::x86_avx512_psra_q_512:
143 case Intrinsic::x86_avx512_psra_w_512:
144 LogicalShift =
false;
147 case Intrinsic::x86_sse2_psrli_d:
148 case Intrinsic::x86_sse2_psrli_q:
149 case Intrinsic::x86_sse2_psrli_w:
150 case Intrinsic::x86_avx2_psrli_d:
151 case Intrinsic::x86_avx2_psrli_q:
152 case Intrinsic::x86_avx2_psrli_w:
153 case Intrinsic::x86_avx512_psrli_d_512:
154 case Intrinsic::x86_avx512_psrli_q_512:
155 case Intrinsic::x86_avx512_psrli_w_512:
158 case Intrinsic::x86_sse2_psrl_d:
159 case Intrinsic::x86_sse2_psrl_q:
160 case Intrinsic::x86_sse2_psrl_w:
161 case Intrinsic::x86_avx2_psrl_d:
162 case Intrinsic::x86_avx2_psrl_q:
163 case Intrinsic::x86_avx2_psrl_w:
164 case Intrinsic::x86_avx512_psrl_d_512:
165 case Intrinsic::x86_avx512_psrl_q_512:
166 case Intrinsic::x86_avx512_psrl_w_512:
170 case Intrinsic::x86_sse2_pslli_d:
171 case Intrinsic::x86_sse2_pslli_q:
172 case Intrinsic::x86_sse2_pslli_w:
173 case Intrinsic::x86_avx2_pslli_d:
174 case Intrinsic::x86_avx2_pslli_q:
175 case Intrinsic::x86_avx2_pslli_w:
176 case Intrinsic::x86_avx512_pslli_d_512:
177 case Intrinsic::x86_avx512_pslli_q_512:
178 case Intrinsic::x86_avx512_pslli_w_512:
181 case Intrinsic::x86_sse2_psll_d:
182 case Intrinsic::x86_sse2_psll_q:
183 case Intrinsic::x86_sse2_psll_w:
184 case Intrinsic::x86_avx2_psll_d:
185 case Intrinsic::x86_avx2_psll_q:
186 case Intrinsic::x86_avx2_psll_w:
187 case Intrinsic::x86_avx512_psll_d_512:
188 case Intrinsic::x86_avx512_psll_q_512:
189 case Intrinsic::x86_avx512_psll_w_512:
194 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
196 Value *Vec =
II.getArgOperand(0);
197 Value *Amt =
II.getArgOperand(1);
199 Type *SVT = VT->getElementType();
201 unsigned VWidth = VT->getNumElements();
212 Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
213 Amt = Builder.CreateVectorSplat(VWidth, Amt);
214 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
215 : Builder.CreateLShr(Vec, Amt))
216 : Builder.CreateAShr(Vec, Amt));
221 Amt = ConstantInt::get(SVT,
BitWidth - 1);
222 return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
229 "Unexpected shift-by-scalar type");
234 Amt, DemandedLower,
II.getDataLayout());
236 Amt, DemandedUpper,
II.getDataLayout());
240 Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
241 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
242 : Builder.CreateLShr(Vec, Amt))
243 : Builder.CreateAShr(Vec, Amt));
256 "Unexpected shift-by-scalar type");
260 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
261 unsigned SubEltIdx = (NumSubElts - 1) - i;
264 Count |= SubElt->getValue().zextOrTrunc(64);
282 auto ShiftAmt = ConstantInt::get(SVT,
Count.zextOrTrunc(
BitWidth));
283 auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
286 return Builder.CreateShl(Vec, ShiftVec);
289 return Builder.CreateLShr(Vec, ShiftVec);
291 return Builder.CreateAShr(Vec, ShiftVec);
2203 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2204 unsigned DemandedWidth) {
2205 APInt UndefElts(Width, 0);
2212 case Intrinsic::x86_bmi_bextr_32:
2213 case Intrinsic::x86_bmi_bextr_64:
2214 case Intrinsic::x86_tbm_bextri_u32:
2215 case Intrinsic::x86_tbm_bextri_u64:
2221 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2228 uint64_t Result = InC->getZExtValue() >> Shift;
2233 ConstantInt::get(
II.getType(), Result));
2240 case Intrinsic::x86_bmi_bzhi_32:
2241 case Intrinsic::x86_bmi_bzhi_64:
2244 uint64_t Index =
C->getZExtValue() & 0xff;
2245 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2254 uint64_t Result = InC->getZExtValue();
2257 ConstantInt::get(
II.getType(), Result));
2262 case Intrinsic::x86_bmi_pext_32:
2263 case Intrinsic::x86_bmi_pext_64:
2265 if (MaskC->isNullValue()) {
2268 if (MaskC->isAllOnesValue()) {
2272 unsigned MaskIdx, MaskLen;
2273 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2279 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2285 uint64_t Src = SrcC->getZExtValue();
2286 uint64_t Mask = MaskC->getZExtValue();
2293 if (BitToTest & Src)
2302 ConstantInt::get(
II.getType(), Result));
2306 case Intrinsic::x86_bmi_pdep_32:
2307 case Intrinsic::x86_bmi_pdep_64:
2309 if (MaskC->isNullValue()) {
2312 if (MaskC->isAllOnesValue()) {
2316 unsigned MaskIdx, MaskLen;
2317 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2322 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2329 uint64_t Src = SrcC->getZExtValue();
2330 uint64_t Mask = MaskC->getZExtValue();
2337 if (BitToTest & Src)
2346 ConstantInt::get(
II.getType(), Result));
2351 case Intrinsic::x86_sse_cvtss2si:
2352 case Intrinsic::x86_sse_cvtss2si64:
2353 case Intrinsic::x86_sse_cvttss2si:
2354 case Intrinsic::x86_sse_cvttss2si64:
2355 case Intrinsic::x86_sse2_cvtsd2si:
2356 case Intrinsic::x86_sse2_cvtsd2si64:
2357 case Intrinsic::x86_sse2_cvttsd2si:
2358 case Intrinsic::x86_sse2_cvttsd2si64:
2359 case Intrinsic::x86_avx512_vcvtss2si32:
2360 case Intrinsic::x86_avx512_vcvtss2si64:
2361 case Intrinsic::x86_avx512_vcvtss2usi32:
2362 case Intrinsic::x86_avx512_vcvtss2usi64:
2363 case Intrinsic::x86_avx512_vcvtsd2si32:
2364 case Intrinsic::x86_avx512_vcvtsd2si64:
2365 case Intrinsic::x86_avx512_vcvtsd2usi32:
2366 case Intrinsic::x86_avx512_vcvtsd2usi64:
2367 case Intrinsic::x86_avx512_cvttss2si:
2368 case Intrinsic::x86_avx512_cvttss2si64:
2369 case Intrinsic::x86_avx512_cvttss2usi:
2370 case Intrinsic::x86_avx512_cvttss2usi64:
2371 case Intrinsic::x86_avx512_cvttsd2si:
2372 case Intrinsic::x86_avx512_cvttsd2si64:
2373 case Intrinsic::x86_avx512_cvttsd2usi:
2374 case Intrinsic::x86_avx512_cvttsd2usi64: {
2377 Value *Arg =
II.getArgOperand(0);
2379 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2385 case Intrinsic::x86_mmx_pmovmskb:
2386 case Intrinsic::x86_sse_movmsk_ps:
2387 case Intrinsic::x86_sse2_movmsk_pd:
2388 case Intrinsic::x86_sse2_pmovmskb_128:
2389 case Intrinsic::x86_avx_movmsk_pd_256:
2390 case Intrinsic::x86_avx_movmsk_ps_256:
2391 case Intrinsic::x86_avx2_pmovmskb:
2397 case Intrinsic::x86_sse_comieq_ss:
2398 case Intrinsic::x86_sse_comige_ss:
2399 case Intrinsic::x86_sse_comigt_ss:
2400 case Intrinsic::x86_sse_comile_ss:
2401 case Intrinsic::x86_sse_comilt_ss:
2402 case Intrinsic::x86_sse_comineq_ss:
2403 case Intrinsic::x86_sse_ucomieq_ss:
2404 case Intrinsic::x86_sse_ucomige_ss:
2405 case Intrinsic::x86_sse_ucomigt_ss:
2406 case Intrinsic::x86_sse_ucomile_ss:
2407 case Intrinsic::x86_sse_ucomilt_ss:
2408 case Intrinsic::x86_sse_ucomineq_ss:
2409 case Intrinsic::x86_sse2_comieq_sd:
2410 case Intrinsic::x86_sse2_comige_sd:
2411 case Intrinsic::x86_sse2_comigt_sd:
2412 case Intrinsic::x86_sse2_comile_sd:
2413 case Intrinsic::x86_sse2_comilt_sd:
2414 case Intrinsic::x86_sse2_comineq_sd:
2415 case Intrinsic::x86_sse2_ucomieq_sd:
2416 case Intrinsic::x86_sse2_ucomige_sd:
2417 case Intrinsic::x86_sse2_ucomigt_sd:
2418 case Intrinsic::x86_sse2_ucomile_sd:
2419 case Intrinsic::x86_sse2_ucomilt_sd:
2420 case Intrinsic::x86_sse2_ucomineq_sd:
2421 case Intrinsic::x86_avx512_vcomi_ss:
2422 case Intrinsic::x86_avx512_vcomi_sd:
2423 case Intrinsic::x86_avx512_mask_cmp_ss:
2424 case Intrinsic::x86_avx512_mask_cmp_sd: {
2427 bool MadeChange =
false;
2428 Value *Arg0 =
II.getArgOperand(0);
2429 Value *Arg1 =
II.getArgOperand(1);
2431 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2435 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2445 case Intrinsic::x86_avx512_add_ps_512:
2446 case Intrinsic::x86_avx512_div_ps_512:
2447 case Intrinsic::x86_avx512_mul_ps_512:
2448 case Intrinsic::x86_avx512_sub_ps_512:
2449 case Intrinsic::x86_avx512_add_pd_512:
2450 case Intrinsic::x86_avx512_div_pd_512:
2451 case Intrinsic::x86_avx512_mul_pd_512:
2452 case Intrinsic::x86_avx512_sub_pd_512:
2456 if (R->getValue() == 4) {
2457 Value *Arg0 =
II.getArgOperand(0);
2458 Value *Arg1 =
II.getArgOperand(1);
2464 case Intrinsic::x86_avx512_add_ps_512:
2465 case Intrinsic::x86_avx512_add_pd_512:
2468 case Intrinsic::x86_avx512_sub_ps_512:
2469 case Intrinsic::x86_avx512_sub_pd_512:
2472 case Intrinsic::x86_avx512_mul_ps_512:
2473 case Intrinsic::x86_avx512_mul_pd_512:
2476 case Intrinsic::x86_avx512_div_ps_512:
2477 case Intrinsic::x86_avx512_div_pd_512:
2487 case Intrinsic::x86_avx512_mask_add_ss_round:
2488 case Intrinsic::x86_avx512_mask_div_ss_round:
2489 case Intrinsic::x86_avx512_mask_mul_ss_round:
2490 case Intrinsic::x86_avx512_mask_sub_ss_round:
2491 case Intrinsic::x86_avx512_mask_add_sd_round:
2492 case Intrinsic::x86_avx512_mask_div_sd_round:
2493 case Intrinsic::x86_avx512_mask_mul_sd_round:
2494 case Intrinsic::x86_avx512_mask_sub_sd_round:
2498 if (R->getValue() == 4) {
2500 Value *Arg0 =
II.getArgOperand(0);
2501 Value *Arg1 =
II.getArgOperand(1);
2509 case Intrinsic::x86_avx512_mask_add_ss_round:
2510 case Intrinsic::x86_avx512_mask_add_sd_round:
2513 case Intrinsic::x86_avx512_mask_sub_ss_round:
2514 case Intrinsic::x86_avx512_mask_sub_sd_round:
2517 case Intrinsic::x86_avx512_mask_mul_ss_round:
2518 case Intrinsic::x86_avx512_mask_mul_sd_round:
2521 case Intrinsic::x86_avx512_mask_div_ss_round:
2522 case Intrinsic::x86_avx512_mask_div_sd_round:
2528 Value *Mask =
II.getArgOperand(3);
2531 if (!
C || !
C->getValue()[0]) {
2553 case Intrinsic::x86_sse_max_ps:
2554 case Intrinsic::x86_sse2_max_pd:
2555 case Intrinsic::x86_avx_max_pd_256:
2556 case Intrinsic::x86_avx_max_ps_256:
2557 case Intrinsic::x86_avx512_max_pd_512:
2558 case Intrinsic::x86_avx512_max_ps_512:
2559 case Intrinsic::x86_avx512fp16_max_ph_128:
2560 case Intrinsic::x86_avx512fp16_max_ph_256:
2561 case Intrinsic::x86_avx512fp16_max_ph_512:
2565 case Intrinsic::x86_sse_max_ss:
2566 case Intrinsic::x86_sse2_max_sd: {
2572 case Intrinsic::x86_sse_min_ps:
2573 case Intrinsic::x86_sse2_min_pd:
2574 case Intrinsic::x86_avx_min_pd_256:
2575 case Intrinsic::x86_avx_min_ps_256:
2576 case Intrinsic::x86_avx512_min_pd_512:
2577 case Intrinsic::x86_avx512_min_ps_512:
2578 case Intrinsic::x86_avx512fp16_min_ph_128:
2579 case Intrinsic::x86_avx512fp16_min_ph_256:
2580 case Intrinsic::x86_avx512fp16_min_ph_512:
2585 case Intrinsic::x86_sse_min_ss:
2586 case Intrinsic::x86_sse2_min_sd: {
2595 case Intrinsic::x86_sse2_psrai_d:
2596 case Intrinsic::x86_sse2_psrai_w:
2597 case Intrinsic::x86_avx2_psrai_d:
2598 case Intrinsic::x86_avx2_psrai_w:
2599 case Intrinsic::x86_avx512_psrai_q_128:
2600 case Intrinsic::x86_avx512_psrai_q_256:
2601 case Intrinsic::x86_avx512_psrai_d_512:
2602 case Intrinsic::x86_avx512_psrai_q_512:
2603 case Intrinsic::x86_avx512_psrai_w_512:
2604 case Intrinsic::x86_sse2_psrli_d:
2605 case Intrinsic::x86_sse2_psrli_q:
2606 case Intrinsic::x86_sse2_psrli_w:
2607 case Intrinsic::x86_avx2_psrli_d:
2608 case Intrinsic::x86_avx2_psrli_q:
2609 case Intrinsic::x86_avx2_psrli_w:
2610 case Intrinsic::x86_avx512_psrli_d_512:
2611 case Intrinsic::x86_avx512_psrli_q_512:
2612 case Intrinsic::x86_avx512_psrli_w_512:
2613 case Intrinsic::x86_sse2_pslli_d:
2614 case Intrinsic::x86_sse2_pslli_q:
2615 case Intrinsic::x86_sse2_pslli_w:
2616 case Intrinsic::x86_avx2_pslli_d:
2617 case Intrinsic::x86_avx2_pslli_q:
2618 case Intrinsic::x86_avx2_pslli_w:
2619 case Intrinsic::x86_avx512_pslli_d_512:
2620 case Intrinsic::x86_avx512_pslli_q_512:
2621 case Intrinsic::x86_avx512_pslli_w_512:
2627 case Intrinsic::x86_sse2_psra_d:
2628 case Intrinsic::x86_sse2_psra_w:
2629 case Intrinsic::x86_avx2_psra_d:
2630 case Intrinsic::x86_avx2_psra_w:
2631 case Intrinsic::x86_avx512_psra_q_128:
2632 case Intrinsic::x86_avx512_psra_q_256:
2633 case Intrinsic::x86_avx512_psra_d_512:
2634 case Intrinsic::x86_avx512_psra_q_512:
2635 case Intrinsic::x86_avx512_psra_w_512:
2636 case Intrinsic::x86_sse2_psrl_d:
2637 case Intrinsic::x86_sse2_psrl_q:
2638 case Intrinsic::x86_sse2_psrl_w:
2639 case Intrinsic::x86_avx2_psrl_d:
2640 case Intrinsic::x86_avx2_psrl_q:
2641 case Intrinsic::x86_avx2_psrl_w:
2642 case Intrinsic::x86_avx512_psrl_d_512:
2643 case Intrinsic::x86_avx512_psrl_q_512:
2644 case Intrinsic::x86_avx512_psrl_w_512:
2645 case Intrinsic::x86_sse2_psll_d:
2646 case Intrinsic::x86_sse2_psll_q:
2647 case Intrinsic::x86_sse2_psll_w:
2648 case Intrinsic::x86_avx2_psll_d:
2649 case Intrinsic::x86_avx2_psll_q:
2650 case Intrinsic::x86_avx2_psll_w:
2651 case Intrinsic::x86_avx512_psll_d_512:
2652 case Intrinsic::x86_avx512_psll_q_512:
2653 case Intrinsic::x86_avx512_psll_w_512: {
2660 Value *Arg1 =
II.getArgOperand(1);
2662 "Unexpected packed shift size");
2665 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2671 case Intrinsic::x86_avx2_psllv_d:
2672 case Intrinsic::x86_avx2_psllv_d_256:
2673 case Intrinsic::x86_avx2_psllv_q:
2674 case Intrinsic::x86_avx2_psllv_q_256:
2675 case Intrinsic::x86_avx512_psllv_d_512:
2676 case Intrinsic::x86_avx512_psllv_q_512:
2677 case Intrinsic::x86_avx512_psllv_w_128:
2678 case Intrinsic::x86_avx512_psllv_w_256:
2679 case Intrinsic::x86_avx512_psllv_w_512:
2680 case Intrinsic::x86_avx2_psrav_d:
2681 case Intrinsic::x86_avx2_psrav_d_256:
2682 case Intrinsic::x86_avx512_psrav_q_128:
2683 case Intrinsic::x86_avx512_psrav_q_256:
2684 case Intrinsic::x86_avx512_psrav_d_512:
2685 case Intrinsic::x86_avx512_psrav_q_512:
2686 case Intrinsic::x86_avx512_psrav_w_128:
2687 case Intrinsic::x86_avx512_psrav_w_256:
2688 case Intrinsic::x86_avx512_psrav_w_512:
2689 case Intrinsic::x86_avx2_psrlv_d:
2690 case Intrinsic::x86_avx2_psrlv_d_256:
2691 case Intrinsic::x86_avx2_psrlv_q:
2692 case Intrinsic::x86_avx2_psrlv_q_256:
2693 case Intrinsic::x86_avx512_psrlv_d_512:
2694 case Intrinsic::x86_avx512_psrlv_q_512:
2695 case Intrinsic::x86_avx512_psrlv_w_128:
2696 case Intrinsic::x86_avx512_psrlv_w_256:
2697 case Intrinsic::x86_avx512_psrlv_w_512:
2703 case Intrinsic::x86_sse2_packssdw_128:
2704 case Intrinsic::x86_sse2_packsswb_128:
2705 case Intrinsic::x86_avx2_packssdw:
2706 case Intrinsic::x86_avx2_packsswb:
2707 case Intrinsic::x86_avx512_packssdw_512:
2708 case Intrinsic::x86_avx512_packsswb_512:
2714 case Intrinsic::x86_sse2_packuswb_128:
2715 case Intrinsic::x86_sse41_packusdw:
2716 case Intrinsic::x86_avx2_packusdw:
2717 case Intrinsic::x86_avx2_packuswb:
2718 case Intrinsic::x86_avx512_packusdw_512:
2719 case Intrinsic::x86_avx512_packuswb_512:
2725 case Intrinsic::x86_sse2_pmulh_w:
2726 case Intrinsic::x86_avx2_pmulh_w:
2727 case Intrinsic::x86_avx512_pmulh_w_512:
2733 case Intrinsic::x86_sse2_pmulhu_w:
2734 case Intrinsic::x86_avx2_pmulhu_w:
2735 case Intrinsic::x86_avx512_pmulhu_w_512:
2741 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2742 case Intrinsic::x86_avx2_pmul_hr_sw:
2743 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2749 case Intrinsic::x86_sse2_pmadd_wd:
2750 case Intrinsic::x86_avx2_pmadd_wd:
2751 case Intrinsic::x86_avx512_pmaddw_d_512:
2757 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2758 case Intrinsic::x86_avx2_pmadd_ub_sw:
2759 case Intrinsic::x86_avx512_pmaddubs_w_512:
2765 case Intrinsic::x86_pclmulqdq:
2766 case Intrinsic::x86_pclmulqdq_256:
2767 case Intrinsic::x86_pclmulqdq_512: {
2769 unsigned Imm =
C->getZExtValue();
2771 bool MadeChange =
false;
2772 Value *Arg0 =
II.getArgOperand(0);
2773 Value *Arg1 =
II.getArgOperand(1);
2777 APInt UndefElts1(VWidth, 0);
2778 APInt DemandedElts1 =
2786 APInt UndefElts2(VWidth, 0);
2787 APInt DemandedElts2 =
2809 case Intrinsic::x86_sse41_insertps:
2815 case Intrinsic::x86_sse4a_extrq: {
2816 Value *Op0 =
II.getArgOperand(0);
2817 Value *Op1 =
II.getArgOperand(1);
2822 VWidth1 == 16 &&
"Unexpected operand sizes");
2840 bool MadeChange =
false;
2841 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2845 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2855 case Intrinsic::x86_sse4a_extrqi: {
2858 Value *Op0 =
II.getArgOperand(0);
2861 "Unexpected operand size");
2874 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2880 case Intrinsic::x86_sse4a_insertq: {
2881 Value *Op0 =
II.getArgOperand(0);
2882 Value *Op1 =
II.getArgOperand(1);
2887 "Unexpected operand size");
2897 const APInt &V11 = CI11->getValue();
2907 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2913 case Intrinsic::x86_sse4a_insertqi: {
2917 Value *Op0 =
II.getArgOperand(0);
2918 Value *Op1 =
II.getArgOperand(1);
2923 VWidth1 == 2 &&
"Unexpected operand sizes");
2930 if (CILength && CIIndex) {
2931 APInt Len = CILength->getValue().zextOrTrunc(6);
2940 bool MadeChange =
false;
2941 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2945 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2955 case Intrinsic::x86_sse41_pblendvb:
2956 case Intrinsic::x86_sse41_blendvps:
2957 case Intrinsic::x86_sse41_blendvpd:
2958 case Intrinsic::x86_avx_blendv_ps_256:
2959 case Intrinsic::x86_avx_blendv_pd_256:
2960 case Intrinsic::x86_avx2_pblendvb: {
2963 Value *Op0 =
II.getArgOperand(0);
2964 Value *Op1 =
II.getArgOperand(1);
2965 Value *Mask =
II.getArgOperand(2);
2981 unsigned BitWidth = Mask->getType()->getScalarSizeInBits();
2983 if (Mask->getType()->isIntOrIntVectorTy()) {
2988 if (BC->hasOneUse()) {
2989 Value *Src = BC->getOperand(0);
2990 if (Src->getType()->isIntOrIntVectorTy()) {
2991 unsigned SrcBitWidth = Src->getType()->getScalarSizeInBits();
3007 if (MaskTy->getScalarSizeInBits() == OpTy->getScalarSizeInBits()) {
3019 Value *MaskSrc =
nullptr;
3022 m_Mask(ShuffleMask))))) {
3025 if (NumElts < (
int)ShuffleMask.size() || !
isPowerOf2_32(NumElts) ||
3027 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
3039 unsigned NumMaskElts = MaskTy->getNumElements();
3040 unsigned NumOperandElts = OpTy->getNumElements();
3044 unsigned NumMaskSrcElts =
3046 NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;
3048 if (NumMaskElts > NumOperandElts)
3056 assert(MaskTy->getPrimitiveSizeInBits() ==
3057 OpTy->getPrimitiveSizeInBits() &&
3058 "Not expecting mask and operands with different sizes");
3060 if (NumMaskElts == NumOperandElts) {
3066 if (NumMaskElts < NumOperandElts) {
3077 case Intrinsic::x86_ssse3_pshuf_b_128:
3078 case Intrinsic::x86_avx2_pshuf_b:
3079 case Intrinsic::x86_avx512_pshuf_b_512: {
3090 case Intrinsic::x86_avx_vpermilvar_ps:
3091 case Intrinsic::x86_avx_vpermilvar_ps_256:
3092 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
3103 case Intrinsic::x86_avx_vpermilvar_pd:
3104 case Intrinsic::x86_avx_vpermilvar_pd_256:
3105 case Intrinsic::x86_avx512_vpermilvar_pd_512: {
3116 case Intrinsic::x86_avx2_permd:
3117 case Intrinsic::x86_avx2_permps:
3118 case Intrinsic::x86_avx512_permvar_df_256:
3119 case Intrinsic::x86_avx512_permvar_df_512:
3120 case Intrinsic::x86_avx512_permvar_di_256:
3121 case Intrinsic::x86_avx512_permvar_di_512:
3122 case Intrinsic::x86_avx512_permvar_hi_128:
3123 case Intrinsic::x86_avx512_permvar_hi_256:
3124 case Intrinsic::x86_avx512_permvar_hi_512:
3125 case Intrinsic::x86_avx512_permvar_qi_128:
3126 case Intrinsic::x86_avx512_permvar_qi_256:
3127 case Intrinsic::x86_avx512_permvar_qi_512:
3128 case Intrinsic::x86_avx512_permvar_sf_512:
3129 case Intrinsic::x86_avx512_permvar_si_512:
3137 case Intrinsic::x86_avx512_vpermi2var_d_128:
3138 case Intrinsic::x86_avx512_vpermi2var_d_256:
3139 case Intrinsic::x86_avx512_vpermi2var_d_512:
3140 case Intrinsic::x86_avx512_vpermi2var_hi_128:
3141 case Intrinsic::x86_avx512_vpermi2var_hi_256:
3142 case Intrinsic::x86_avx512_vpermi2var_hi_512:
3143 case Intrinsic::x86_avx512_vpermi2var_pd_128:
3144 case Intrinsic::x86_avx512_vpermi2var_pd_256:
3145 case Intrinsic::x86_avx512_vpermi2var_pd_512:
3146 case Intrinsic::x86_avx512_vpermi2var_ps_128:
3147 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3148 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3149 case Intrinsic::x86_avx512_vpermi2var_q_128:
3150 case Intrinsic::x86_avx512_vpermi2var_q_256:
3151 case Intrinsic::x86_avx512_vpermi2var_q_512:
3152 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3153 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3154 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3162 case Intrinsic::x86_avx_maskload_ps:
3163 case Intrinsic::x86_avx_maskload_pd:
3164 case Intrinsic::x86_avx_maskload_ps_256:
3165 case Intrinsic::x86_avx_maskload_pd_256:
3166 case Intrinsic::x86_avx2_maskload_d:
3167 case Intrinsic::x86_avx2_maskload_q:
3168 case Intrinsic::x86_avx2_maskload_d_256:
3169 case Intrinsic::x86_avx2_maskload_q_256:
3175 case Intrinsic::x86_sse2_maskmov_dqu:
3176 case Intrinsic::x86_avx_maskstore_ps:
3177 case Intrinsic::x86_avx_maskstore_pd:
3178 case Intrinsic::x86_avx_maskstore_ps_256:
3179 case Intrinsic::x86_avx_maskstore_pd_256:
3180 case Intrinsic::x86_avx2_maskstore_d:
3181 case Intrinsic::x86_avx2_maskstore_q:
3182 case Intrinsic::x86_avx2_maskstore_d_256:
3183 case Intrinsic::x86_avx2_maskstore_q_256:
3189 case Intrinsic::x86_addcarry_32:
3190 case Intrinsic::x86_addcarry_64:
3196 case Intrinsic::x86_avx512_pternlog_d_128:
3197 case Intrinsic::x86_avx512_pternlog_d_256:
3198 case Intrinsic::x86_avx512_pternlog_d_512:
3199 case Intrinsic::x86_avx512_pternlog_q_128:
3200 case Intrinsic::x86_avx512_pternlog_q_256:
3201 case Intrinsic::x86_avx512_pternlog_q_512:
3210 return std::nullopt;
3257 simplifyAndSetOp)
const {
3259 switch (
II.getIntrinsicID()) {
3262 case Intrinsic::x86_xop_vfrcz_ss:
3263 case Intrinsic::x86_xop_vfrcz_sd:
3268 if (!DemandedElts[0]) {
3275 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3278 UndefElts = UndefElts[0];
3282 case Intrinsic::x86_sse_rcp_ss:
3283 case Intrinsic::x86_sse_rsqrt_ss:
3284 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3287 if (!DemandedElts[0]) {
3289 return II.getArgOperand(0);
3298 case Intrinsic::x86_sse_min_ss:
3299 case Intrinsic::x86_sse_max_ss:
3300 case Intrinsic::x86_sse_cmp_ss:
3301 case Intrinsic::x86_sse2_min_sd:
3302 case Intrinsic::x86_sse2_max_sd:
3303 case Intrinsic::x86_sse2_cmp_sd: {
3304 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3307 if (!DemandedElts[0]) {
3309 return II.getArgOperand(0);
3314 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3326 case Intrinsic::x86_sse41_round_ss:
3327 case Intrinsic::x86_sse41_round_sd: {
3329 APInt DemandedElts2 = DemandedElts;
3331 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3334 if (!DemandedElts[0]) {
3336 return II.getArgOperand(0);
3341 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3346 UndefElts |= UndefElts2[0];
3353 case Intrinsic::x86_avx512_mask_add_ss_round:
3354 case Intrinsic::x86_avx512_mask_div_ss_round:
3355 case Intrinsic::x86_avx512_mask_mul_ss_round:
3356 case Intrinsic::x86_avx512_mask_sub_ss_round:
3357 case Intrinsic::x86_avx512_mask_max_ss_round:
3358 case Intrinsic::x86_avx512_mask_min_ss_round:
3359 case Intrinsic::x86_avx512_mask_add_sd_round:
3360 case Intrinsic::x86_avx512_mask_div_sd_round:
3361 case Intrinsic::x86_avx512_mask_mul_sd_round:
3362 case Intrinsic::x86_avx512_mask_sub_sd_round:
3363 case Intrinsic::x86_avx512_mask_max_sd_round:
3364 case Intrinsic::x86_avx512_mask_min_sd_round:
3365 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3368 if (!DemandedElts[0]) {
3370 return II.getArgOperand(0);
3375 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3376 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3380 if (!UndefElts2[0] || !UndefElts3[0])
3385 case Intrinsic::x86_sse3_addsub_pd:
3386 case Intrinsic::x86_sse3_addsub_ps:
3387 case Intrinsic::x86_avx_addsub_pd_256:
3388 case Intrinsic::x86_avx_addsub_ps_256: {
3393 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3394 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3395 if (IsSubOnly || IsAddOnly) {
3396 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3399 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3401 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3404 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3405 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3406 UndefElts &= UndefElts2;
3411 case Intrinsic::x86_avx2_psllv_d:
3412 case Intrinsic::x86_avx2_psllv_d_256:
3413 case Intrinsic::x86_avx2_psllv_q:
3414 case Intrinsic::x86_avx2_psllv_q_256:
3415 case Intrinsic::x86_avx2_psrlv_d:
3416 case Intrinsic::x86_avx2_psrlv_d_256:
3417 case Intrinsic::x86_avx2_psrlv_q:
3418 case Intrinsic::x86_avx2_psrlv_q_256:
3419 case Intrinsic::x86_avx2_psrav_d:
3420 case Intrinsic::x86_avx2_psrav_d_256: {
3421 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3422 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3423 UndefElts &= UndefElts2;
3427 case Intrinsic::x86_sse2_pmulh_w:
3428 case Intrinsic::x86_avx2_pmulh_w:
3429 case Intrinsic::x86_avx512_pmulh_w_512:
3430 case Intrinsic::x86_sse2_pmulhu_w:
3431 case Intrinsic::x86_avx2_pmulhu_w:
3432 case Intrinsic::x86_avx512_pmulhu_w_512:
3433 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3434 case Intrinsic::x86_avx2_pmul_hr_sw:
3435 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3436 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3437 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3442 case Intrinsic::x86_sse2_packssdw_128:
3443 case Intrinsic::x86_sse2_packsswb_128:
3444 case Intrinsic::x86_sse2_packuswb_128:
3445 case Intrinsic::x86_sse41_packusdw:
3446 case Intrinsic::x86_avx2_packssdw:
3447 case Intrinsic::x86_avx2_packsswb:
3448 case Intrinsic::x86_avx2_packusdw:
3449 case Intrinsic::x86_avx2_packuswb:
3450 case Intrinsic::x86_avx512_packssdw_512:
3451 case Intrinsic::x86_avx512_packsswb_512:
3452 case Intrinsic::x86_avx512_packusdw_512:
3453 case Intrinsic::x86_avx512_packuswb_512: {
3454 auto *Ty0 =
II.getArgOperand(0)->getType();
3456 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3458 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3459 unsigned VWidthPerLane = VWidth / NumLanes;
3460 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3466 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3467 APInt OpDemandedElts(InnerVWidth, 0);
3468 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3469 unsigned LaneIdx = Lane * VWidthPerLane;
3470 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3471 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3472 if (DemandedElts[Idx])
3473 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3478 APInt OpUndefElts(InnerVWidth, 0);
3479 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3482 OpUndefElts = OpUndefElts.
zext(VWidth);
3483 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3484 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3485 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3486 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3487 UndefElts |= LaneElts;
3493 case Intrinsic::x86_sse2_pmadd_wd:
3494 case Intrinsic::x86_avx2_pmadd_wd:
3495 case Intrinsic::x86_avx512_pmaddw_d_512:
3496 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3497 case Intrinsic::x86_avx2_pmadd_ub_sw:
3498 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3500 auto *ArgTy =
II.getArgOperand(0)->getType();
3502 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3504 APInt Op0UndefElts(InnerVWidth, 0);
3505 APInt Op1UndefElts(InnerVWidth, 0);
3506 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3507 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3513 case Intrinsic::x86_ssse3_pshuf_b_128:
3514 case Intrinsic::x86_avx2_pshuf_b:
3515 case Intrinsic::x86_avx512_pshuf_b_512:
3517 case Intrinsic::x86_avx_vpermilvar_ps:
3518 case Intrinsic::x86_avx_vpermilvar_ps_256:
3519 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3520 case Intrinsic::x86_avx_vpermilvar_pd:
3521 case Intrinsic::x86_avx_vpermilvar_pd_256:
3522 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3524 case Intrinsic::x86_avx2_permd:
3525 case Intrinsic::x86_avx2_permps: {
3526 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3532 case Intrinsic::x86_sse4a_extrq:
3533 case Intrinsic::x86_sse4a_extrqi:
3534 case Intrinsic::x86_sse4a_insertq:
3535 case Intrinsic::x86_sse4a_insertqi:
3539 return std::nullopt;