117 bool LogicalShift =
false;
118 bool ShiftLeft =
false;
121 switch (
II.getIntrinsicID()) {
124 case Intrinsic::x86_sse2_psrai_d:
125 case Intrinsic::x86_sse2_psrai_w:
126 case Intrinsic::x86_avx2_psrai_d:
127 case Intrinsic::x86_avx2_psrai_w:
128 case Intrinsic::x86_avx512_psrai_q_128:
129 case Intrinsic::x86_avx512_psrai_q_256:
130 case Intrinsic::x86_avx512_psrai_d_512:
131 case Intrinsic::x86_avx512_psrai_q_512:
132 case Intrinsic::x86_avx512_psrai_w_512:
135 case Intrinsic::x86_sse2_psra_d:
136 case Intrinsic::x86_sse2_psra_w:
137 case Intrinsic::x86_avx2_psra_d:
138 case Intrinsic::x86_avx2_psra_w:
139 case Intrinsic::x86_avx512_psra_q_128:
140 case Intrinsic::x86_avx512_psra_q_256:
141 case Intrinsic::x86_avx512_psra_d_512:
142 case Intrinsic::x86_avx512_psra_q_512:
143 case Intrinsic::x86_avx512_psra_w_512:
144 LogicalShift =
false;
147 case Intrinsic::x86_sse2_psrli_d:
148 case Intrinsic::x86_sse2_psrli_q:
149 case Intrinsic::x86_sse2_psrli_w:
150 case Intrinsic::x86_avx2_psrli_d:
151 case Intrinsic::x86_avx2_psrli_q:
152 case Intrinsic::x86_avx2_psrli_w:
153 case Intrinsic::x86_avx512_psrli_d_512:
154 case Intrinsic::x86_avx512_psrli_q_512:
155 case Intrinsic::x86_avx512_psrli_w_512:
158 case Intrinsic::x86_sse2_psrl_d:
159 case Intrinsic::x86_sse2_psrl_q:
160 case Intrinsic::x86_sse2_psrl_w:
161 case Intrinsic::x86_avx2_psrl_d:
162 case Intrinsic::x86_avx2_psrl_q:
163 case Intrinsic::x86_avx2_psrl_w:
164 case Intrinsic::x86_avx512_psrl_d_512:
165 case Intrinsic::x86_avx512_psrl_q_512:
166 case Intrinsic::x86_avx512_psrl_w_512:
170 case Intrinsic::x86_sse2_pslli_d:
171 case Intrinsic::x86_sse2_pslli_q:
172 case Intrinsic::x86_sse2_pslli_w:
173 case Intrinsic::x86_avx2_pslli_d:
174 case Intrinsic::x86_avx2_pslli_q:
175 case Intrinsic::x86_avx2_pslli_w:
176 case Intrinsic::x86_avx512_pslli_d_512:
177 case Intrinsic::x86_avx512_pslli_q_512:
178 case Intrinsic::x86_avx512_pslli_w_512:
181 case Intrinsic::x86_sse2_psll_d:
182 case Intrinsic::x86_sse2_psll_q:
183 case Intrinsic::x86_sse2_psll_w:
184 case Intrinsic::x86_avx2_psll_d:
185 case Intrinsic::x86_avx2_psll_q:
186 case Intrinsic::x86_avx2_psll_w:
187 case Intrinsic::x86_avx512_psll_d_512:
188 case Intrinsic::x86_avx512_psll_q_512:
189 case Intrinsic::x86_avx512_psll_w_512:
194 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
196 Value *Vec =
II.getArgOperand(0);
197 Value *Amt =
II.getArgOperand(1);
199 Type *SVT = VT->getElementType();
201 unsigned VWidth = VT->getNumElements();
212 Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
213 Amt = Builder.CreateVectorSplat(VWidth, Amt);
214 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
215 : Builder.CreateLShr(Vec, Amt))
216 : Builder.CreateAShr(Vec, Amt));
221 Amt = ConstantInt::get(SVT,
BitWidth - 1);
222 return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
229 "Unexpected shift-by-scalar type");
234 Amt, DemandedLower,
II.getDataLayout());
236 Amt, DemandedUpper,
II.getDataLayout());
240 Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
241 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
242 : Builder.CreateLShr(Vec, Amt))
243 : Builder.CreateAShr(Vec, Amt));
256 "Unexpected shift-by-scalar type");
260 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
261 unsigned SubEltIdx = (NumSubElts - 1) - i;
264 Count |= SubElt->getValue().zextOrTrunc(64);
282 auto ShiftAmt = ConstantInt::get(SVT,
Count.zextOrTrunc(
BitWidth));
283 auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
286 return Builder.CreateShl(Vec, ShiftVec);
289 return Builder.CreateLShr(Vec, ShiftVec);
291 return Builder.CreateAShr(Vec, ShiftVec);
2195 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2196 unsigned DemandedWidth) {
2197 APInt UndefElts(Width, 0);
2204 case Intrinsic::x86_bmi_bextr_32:
2205 case Intrinsic::x86_bmi_bextr_64:
2206 case Intrinsic::x86_tbm_bextri_u32:
2207 case Intrinsic::x86_tbm_bextri_u64:
2213 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2220 uint64_t Result = InC->getZExtValue() >> Shift;
2225 ConstantInt::get(
II.getType(), Result));
2232 case Intrinsic::x86_bmi_bzhi_32:
2233 case Intrinsic::x86_bmi_bzhi_64:
2236 uint64_t Index =
C->getZExtValue() & 0xff;
2237 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2246 uint64_t Result = InC->getZExtValue();
2249 ConstantInt::get(
II.getType(), Result));
2254 case Intrinsic::x86_bmi_pext_32:
2255 case Intrinsic::x86_bmi_pext_64:
2257 if (MaskC->isNullValue()) {
2260 if (MaskC->isAllOnesValue()) {
2264 unsigned MaskIdx, MaskLen;
2265 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2271 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2277 uint64_t Src = SrcC->getZExtValue();
2278 uint64_t Mask = MaskC->getZExtValue();
2285 if (BitToTest & Src)
2294 ConstantInt::get(
II.getType(), Result));
2298 case Intrinsic::x86_bmi_pdep_32:
2299 case Intrinsic::x86_bmi_pdep_64:
2301 if (MaskC->isNullValue()) {
2304 if (MaskC->isAllOnesValue()) {
2308 unsigned MaskIdx, MaskLen;
2309 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2314 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2321 uint64_t Src = SrcC->getZExtValue();
2322 uint64_t Mask = MaskC->getZExtValue();
2329 if (BitToTest & Src)
2338 ConstantInt::get(
II.getType(), Result));
2343 case Intrinsic::x86_sse_cvtss2si:
2344 case Intrinsic::x86_sse_cvtss2si64:
2345 case Intrinsic::x86_sse_cvttss2si:
2346 case Intrinsic::x86_sse_cvttss2si64:
2347 case Intrinsic::x86_sse2_cvtsd2si:
2348 case Intrinsic::x86_sse2_cvtsd2si64:
2349 case Intrinsic::x86_sse2_cvttsd2si:
2350 case Intrinsic::x86_sse2_cvttsd2si64:
2351 case Intrinsic::x86_avx512_vcvtss2si32:
2352 case Intrinsic::x86_avx512_vcvtss2si64:
2353 case Intrinsic::x86_avx512_vcvtss2usi32:
2354 case Intrinsic::x86_avx512_vcvtss2usi64:
2355 case Intrinsic::x86_avx512_vcvtsd2si32:
2356 case Intrinsic::x86_avx512_vcvtsd2si64:
2357 case Intrinsic::x86_avx512_vcvtsd2usi32:
2358 case Intrinsic::x86_avx512_vcvtsd2usi64:
2359 case Intrinsic::x86_avx512_cvttss2si:
2360 case Intrinsic::x86_avx512_cvttss2si64:
2361 case Intrinsic::x86_avx512_cvttss2usi:
2362 case Intrinsic::x86_avx512_cvttss2usi64:
2363 case Intrinsic::x86_avx512_cvttsd2si:
2364 case Intrinsic::x86_avx512_cvttsd2si64:
2365 case Intrinsic::x86_avx512_cvttsd2usi:
2366 case Intrinsic::x86_avx512_cvttsd2usi64: {
2369 Value *Arg =
II.getArgOperand(0);
2371 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2377 case Intrinsic::x86_mmx_pmovmskb:
2378 case Intrinsic::x86_sse_movmsk_ps:
2379 case Intrinsic::x86_sse2_movmsk_pd:
2380 case Intrinsic::x86_sse2_pmovmskb_128:
2381 case Intrinsic::x86_avx_movmsk_pd_256:
2382 case Intrinsic::x86_avx_movmsk_ps_256:
2383 case Intrinsic::x86_avx2_pmovmskb:
2389 case Intrinsic::x86_sse_comieq_ss:
2390 case Intrinsic::x86_sse_comige_ss:
2391 case Intrinsic::x86_sse_comigt_ss:
2392 case Intrinsic::x86_sse_comile_ss:
2393 case Intrinsic::x86_sse_comilt_ss:
2394 case Intrinsic::x86_sse_comineq_ss:
2395 case Intrinsic::x86_sse_ucomieq_ss:
2396 case Intrinsic::x86_sse_ucomige_ss:
2397 case Intrinsic::x86_sse_ucomigt_ss:
2398 case Intrinsic::x86_sse_ucomile_ss:
2399 case Intrinsic::x86_sse_ucomilt_ss:
2400 case Intrinsic::x86_sse_ucomineq_ss:
2401 case Intrinsic::x86_sse2_comieq_sd:
2402 case Intrinsic::x86_sse2_comige_sd:
2403 case Intrinsic::x86_sse2_comigt_sd:
2404 case Intrinsic::x86_sse2_comile_sd:
2405 case Intrinsic::x86_sse2_comilt_sd:
2406 case Intrinsic::x86_sse2_comineq_sd:
2407 case Intrinsic::x86_sse2_ucomieq_sd:
2408 case Intrinsic::x86_sse2_ucomige_sd:
2409 case Intrinsic::x86_sse2_ucomigt_sd:
2410 case Intrinsic::x86_sse2_ucomile_sd:
2411 case Intrinsic::x86_sse2_ucomilt_sd:
2412 case Intrinsic::x86_sse2_ucomineq_sd:
2413 case Intrinsic::x86_avx512_vcomi_ss:
2414 case Intrinsic::x86_avx512_vcomi_sd:
2415 case Intrinsic::x86_avx512_mask_cmp_ss:
2416 case Intrinsic::x86_avx512_mask_cmp_sd: {
2419 bool MadeChange =
false;
2420 Value *Arg0 =
II.getArgOperand(0);
2421 Value *Arg1 =
II.getArgOperand(1);
2423 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2427 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2437 case Intrinsic::x86_avx512_add_ps_512:
2438 case Intrinsic::x86_avx512_div_ps_512:
2439 case Intrinsic::x86_avx512_mul_ps_512:
2440 case Intrinsic::x86_avx512_sub_ps_512:
2441 case Intrinsic::x86_avx512_add_pd_512:
2442 case Intrinsic::x86_avx512_div_pd_512:
2443 case Intrinsic::x86_avx512_mul_pd_512:
2444 case Intrinsic::x86_avx512_sub_pd_512:
2448 if (R->getValue() == 4) {
2449 Value *Arg0 =
II.getArgOperand(0);
2450 Value *Arg1 =
II.getArgOperand(1);
2456 case Intrinsic::x86_avx512_add_ps_512:
2457 case Intrinsic::x86_avx512_add_pd_512:
2460 case Intrinsic::x86_avx512_sub_ps_512:
2461 case Intrinsic::x86_avx512_sub_pd_512:
2464 case Intrinsic::x86_avx512_mul_ps_512:
2465 case Intrinsic::x86_avx512_mul_pd_512:
2468 case Intrinsic::x86_avx512_div_ps_512:
2469 case Intrinsic::x86_avx512_div_pd_512:
2479 case Intrinsic::x86_avx512_mask_add_ss_round:
2480 case Intrinsic::x86_avx512_mask_div_ss_round:
2481 case Intrinsic::x86_avx512_mask_mul_ss_round:
2482 case Intrinsic::x86_avx512_mask_sub_ss_round:
2483 case Intrinsic::x86_avx512_mask_add_sd_round:
2484 case Intrinsic::x86_avx512_mask_div_sd_round:
2485 case Intrinsic::x86_avx512_mask_mul_sd_round:
2486 case Intrinsic::x86_avx512_mask_sub_sd_round:
2490 if (R->getValue() == 4) {
2492 Value *Arg0 =
II.getArgOperand(0);
2493 Value *Arg1 =
II.getArgOperand(1);
2501 case Intrinsic::x86_avx512_mask_add_ss_round:
2502 case Intrinsic::x86_avx512_mask_add_sd_round:
2505 case Intrinsic::x86_avx512_mask_sub_ss_round:
2506 case Intrinsic::x86_avx512_mask_sub_sd_round:
2509 case Intrinsic::x86_avx512_mask_mul_ss_round:
2510 case Intrinsic::x86_avx512_mask_mul_sd_round:
2513 case Intrinsic::x86_avx512_mask_div_ss_round:
2514 case Intrinsic::x86_avx512_mask_div_sd_round:
2520 Value *Mask =
II.getArgOperand(3);
2523 if (!
C || !
C->getValue()[0]) {
2545 case Intrinsic::x86_sse_max_ps:
2546 case Intrinsic::x86_sse2_max_pd:
2547 case Intrinsic::x86_avx_max_pd_256:
2548 case Intrinsic::x86_avx_max_ps_256:
2549 case Intrinsic::x86_avx512_max_pd_512:
2550 case Intrinsic::x86_avx512_max_ps_512:
2551 case Intrinsic::x86_avx512fp16_max_ph_128:
2552 case Intrinsic::x86_avx512fp16_max_ph_256:
2553 case Intrinsic::x86_avx512fp16_max_ph_512:
2557 case Intrinsic::x86_sse_max_ss:
2558 case Intrinsic::x86_sse2_max_sd: {
2564 case Intrinsic::x86_sse_min_ps:
2565 case Intrinsic::x86_sse2_min_pd:
2566 case Intrinsic::x86_avx_min_pd_256:
2567 case Intrinsic::x86_avx_min_ps_256:
2568 case Intrinsic::x86_avx512_min_pd_512:
2569 case Intrinsic::x86_avx512_min_ps_512:
2570 case Intrinsic::x86_avx512fp16_min_ph_128:
2571 case Intrinsic::x86_avx512fp16_min_ph_256:
2572 case Intrinsic::x86_avx512fp16_min_ph_512:
2577 case Intrinsic::x86_sse_min_ss:
2578 case Intrinsic::x86_sse2_min_sd: {
2587 case Intrinsic::x86_sse2_psrai_d:
2588 case Intrinsic::x86_sse2_psrai_w:
2589 case Intrinsic::x86_avx2_psrai_d:
2590 case Intrinsic::x86_avx2_psrai_w:
2591 case Intrinsic::x86_avx512_psrai_q_128:
2592 case Intrinsic::x86_avx512_psrai_q_256:
2593 case Intrinsic::x86_avx512_psrai_d_512:
2594 case Intrinsic::x86_avx512_psrai_q_512:
2595 case Intrinsic::x86_avx512_psrai_w_512:
2596 case Intrinsic::x86_sse2_psrli_d:
2597 case Intrinsic::x86_sse2_psrli_q:
2598 case Intrinsic::x86_sse2_psrli_w:
2599 case Intrinsic::x86_avx2_psrli_d:
2600 case Intrinsic::x86_avx2_psrli_q:
2601 case Intrinsic::x86_avx2_psrli_w:
2602 case Intrinsic::x86_avx512_psrli_d_512:
2603 case Intrinsic::x86_avx512_psrli_q_512:
2604 case Intrinsic::x86_avx512_psrli_w_512:
2605 case Intrinsic::x86_sse2_pslli_d:
2606 case Intrinsic::x86_sse2_pslli_q:
2607 case Intrinsic::x86_sse2_pslli_w:
2608 case Intrinsic::x86_avx2_pslli_d:
2609 case Intrinsic::x86_avx2_pslli_q:
2610 case Intrinsic::x86_avx2_pslli_w:
2611 case Intrinsic::x86_avx512_pslli_d_512:
2612 case Intrinsic::x86_avx512_pslli_q_512:
2613 case Intrinsic::x86_avx512_pslli_w_512:
2619 case Intrinsic::x86_sse2_psra_d:
2620 case Intrinsic::x86_sse2_psra_w:
2621 case Intrinsic::x86_avx2_psra_d:
2622 case Intrinsic::x86_avx2_psra_w:
2623 case Intrinsic::x86_avx512_psra_q_128:
2624 case Intrinsic::x86_avx512_psra_q_256:
2625 case Intrinsic::x86_avx512_psra_d_512:
2626 case Intrinsic::x86_avx512_psra_q_512:
2627 case Intrinsic::x86_avx512_psra_w_512:
2628 case Intrinsic::x86_sse2_psrl_d:
2629 case Intrinsic::x86_sse2_psrl_q:
2630 case Intrinsic::x86_sse2_psrl_w:
2631 case Intrinsic::x86_avx2_psrl_d:
2632 case Intrinsic::x86_avx2_psrl_q:
2633 case Intrinsic::x86_avx2_psrl_w:
2634 case Intrinsic::x86_avx512_psrl_d_512:
2635 case Intrinsic::x86_avx512_psrl_q_512:
2636 case Intrinsic::x86_avx512_psrl_w_512:
2637 case Intrinsic::x86_sse2_psll_d:
2638 case Intrinsic::x86_sse2_psll_q:
2639 case Intrinsic::x86_sse2_psll_w:
2640 case Intrinsic::x86_avx2_psll_d:
2641 case Intrinsic::x86_avx2_psll_q:
2642 case Intrinsic::x86_avx2_psll_w:
2643 case Intrinsic::x86_avx512_psll_d_512:
2644 case Intrinsic::x86_avx512_psll_q_512:
2645 case Intrinsic::x86_avx512_psll_w_512: {
2652 Value *Arg1 =
II.getArgOperand(1);
2654 "Unexpected packed shift size");
2657 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2663 case Intrinsic::x86_avx2_psllv_d:
2664 case Intrinsic::x86_avx2_psllv_d_256:
2665 case Intrinsic::x86_avx2_psllv_q:
2666 case Intrinsic::x86_avx2_psllv_q_256:
2667 case Intrinsic::x86_avx512_psllv_d_512:
2668 case Intrinsic::x86_avx512_psllv_q_512:
2669 case Intrinsic::x86_avx512_psllv_w_128:
2670 case Intrinsic::x86_avx512_psllv_w_256:
2671 case Intrinsic::x86_avx512_psllv_w_512:
2672 case Intrinsic::x86_avx2_psrav_d:
2673 case Intrinsic::x86_avx2_psrav_d_256:
2674 case Intrinsic::x86_avx512_psrav_q_128:
2675 case Intrinsic::x86_avx512_psrav_q_256:
2676 case Intrinsic::x86_avx512_psrav_d_512:
2677 case Intrinsic::x86_avx512_psrav_q_512:
2678 case Intrinsic::x86_avx512_psrav_w_128:
2679 case Intrinsic::x86_avx512_psrav_w_256:
2680 case Intrinsic::x86_avx512_psrav_w_512:
2681 case Intrinsic::x86_avx2_psrlv_d:
2682 case Intrinsic::x86_avx2_psrlv_d_256:
2683 case Intrinsic::x86_avx2_psrlv_q:
2684 case Intrinsic::x86_avx2_psrlv_q_256:
2685 case Intrinsic::x86_avx512_psrlv_d_512:
2686 case Intrinsic::x86_avx512_psrlv_q_512:
2687 case Intrinsic::x86_avx512_psrlv_w_128:
2688 case Intrinsic::x86_avx512_psrlv_w_256:
2689 case Intrinsic::x86_avx512_psrlv_w_512:
2695 case Intrinsic::x86_sse2_packssdw_128:
2696 case Intrinsic::x86_sse2_packsswb_128:
2697 case Intrinsic::x86_avx2_packssdw:
2698 case Intrinsic::x86_avx2_packsswb:
2699 case Intrinsic::x86_avx512_packssdw_512:
2700 case Intrinsic::x86_avx512_packsswb_512:
2706 case Intrinsic::x86_sse2_packuswb_128:
2707 case Intrinsic::x86_sse41_packusdw:
2708 case Intrinsic::x86_avx2_packusdw:
2709 case Intrinsic::x86_avx2_packuswb:
2710 case Intrinsic::x86_avx512_packusdw_512:
2711 case Intrinsic::x86_avx512_packuswb_512:
2717 case Intrinsic::x86_sse2_pmulh_w:
2718 case Intrinsic::x86_avx2_pmulh_w:
2719 case Intrinsic::x86_avx512_pmulh_w_512:
2725 case Intrinsic::x86_sse2_pmulhu_w:
2726 case Intrinsic::x86_avx2_pmulhu_w:
2727 case Intrinsic::x86_avx512_pmulhu_w_512:
2733 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2734 case Intrinsic::x86_avx2_pmul_hr_sw:
2735 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2741 case Intrinsic::x86_sse2_pmadd_wd:
2742 case Intrinsic::x86_avx2_pmadd_wd:
2743 case Intrinsic::x86_avx512_pmaddw_d_512:
2749 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2750 case Intrinsic::x86_avx2_pmadd_ub_sw:
2751 case Intrinsic::x86_avx512_pmaddubs_w_512:
2757 case Intrinsic::x86_pclmulqdq:
2758 case Intrinsic::x86_pclmulqdq_256:
2759 case Intrinsic::x86_pclmulqdq_512: {
2761 unsigned Imm =
C->getZExtValue();
2763 bool MadeChange =
false;
2764 Value *Arg0 =
II.getArgOperand(0);
2765 Value *Arg1 =
II.getArgOperand(1);
2769 APInt UndefElts1(VWidth, 0);
2770 APInt DemandedElts1 =
2778 APInt UndefElts2(VWidth, 0);
2779 APInt DemandedElts2 =
2801 case Intrinsic::x86_sse41_insertps:
2807 case Intrinsic::x86_sse4a_extrq: {
2808 Value *Op0 =
II.getArgOperand(0);
2809 Value *Op1 =
II.getArgOperand(1);
2814 VWidth1 == 16 &&
"Unexpected operand sizes");
2832 bool MadeChange =
false;
2833 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2837 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2847 case Intrinsic::x86_sse4a_extrqi: {
2850 Value *Op0 =
II.getArgOperand(0);
2853 "Unexpected operand size");
2866 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2872 case Intrinsic::x86_sse4a_insertq: {
2873 Value *Op0 =
II.getArgOperand(0);
2874 Value *Op1 =
II.getArgOperand(1);
2879 "Unexpected operand size");
2889 const APInt &V11 = CI11->getValue();
2899 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2905 case Intrinsic::x86_sse4a_insertqi: {
2909 Value *Op0 =
II.getArgOperand(0);
2910 Value *Op1 =
II.getArgOperand(1);
2915 VWidth1 == 2 &&
"Unexpected operand sizes");
2922 if (CILength && CIIndex) {
2923 APInt Len = CILength->getValue().zextOrTrunc(6);
2932 bool MadeChange =
false;
2933 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2937 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2947 case Intrinsic::x86_sse41_pblendvb:
2948 case Intrinsic::x86_sse41_blendvps:
2949 case Intrinsic::x86_sse41_blendvpd:
2950 case Intrinsic::x86_avx_blendv_ps_256:
2951 case Intrinsic::x86_avx_blendv_pd_256:
2952 case Intrinsic::x86_avx2_pblendvb: {
2955 Value *Op0 =
II.getArgOperand(0);
2956 Value *Op1 =
II.getArgOperand(1);
2957 Value *Mask =
II.getArgOperand(2);
2973 unsigned BitWidth = Mask->getType()->getScalarSizeInBits();
2975 if (Mask->getType()->isIntOrIntVectorTy()) {
2980 if (BC->hasOneUse()) {
2981 Value *Src = BC->getOperand(0);
2982 if (Src->getType()->isIntOrIntVectorTy()) {
2983 unsigned SrcBitWidth = Src->getType()->getScalarSizeInBits();
2999 if (MaskTy->getScalarSizeInBits() == OpTy->getScalarSizeInBits()) {
3011 Value *MaskSrc =
nullptr;
3014 m_Mask(ShuffleMask))))) {
3017 if (NumElts < (
int)ShuffleMask.size() || !
isPowerOf2_32(NumElts) ||
3019 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
3031 unsigned NumMaskElts = MaskTy->getNumElements();
3032 unsigned NumOperandElts = OpTy->getNumElements();
3036 unsigned NumMaskSrcElts =
3038 NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;
3040 if (NumMaskElts > NumOperandElts)
3048 assert(MaskTy->getPrimitiveSizeInBits() ==
3049 OpTy->getPrimitiveSizeInBits() &&
3050 "Not expecting mask and operands with different sizes");
3052 if (NumMaskElts == NumOperandElts) {
3058 if (NumMaskElts < NumOperandElts) {
3069 case Intrinsic::x86_ssse3_pshuf_b_128:
3070 case Intrinsic::x86_avx2_pshuf_b:
3071 case Intrinsic::x86_avx512_pshuf_b_512: {
3082 case Intrinsic::x86_avx_vpermilvar_ps:
3083 case Intrinsic::x86_avx_vpermilvar_ps_256:
3084 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
3095 case Intrinsic::x86_avx_vpermilvar_pd:
3096 case Intrinsic::x86_avx_vpermilvar_pd_256:
3097 case Intrinsic::x86_avx512_vpermilvar_pd_512: {
3108 case Intrinsic::x86_avx2_permd:
3109 case Intrinsic::x86_avx2_permps:
3110 case Intrinsic::x86_avx512_permvar_df_256:
3111 case Intrinsic::x86_avx512_permvar_df_512:
3112 case Intrinsic::x86_avx512_permvar_di_256:
3113 case Intrinsic::x86_avx512_permvar_di_512:
3114 case Intrinsic::x86_avx512_permvar_hi_128:
3115 case Intrinsic::x86_avx512_permvar_hi_256:
3116 case Intrinsic::x86_avx512_permvar_hi_512:
3117 case Intrinsic::x86_avx512_permvar_qi_128:
3118 case Intrinsic::x86_avx512_permvar_qi_256:
3119 case Intrinsic::x86_avx512_permvar_qi_512:
3120 case Intrinsic::x86_avx512_permvar_sf_512:
3121 case Intrinsic::x86_avx512_permvar_si_512:
3129 case Intrinsic::x86_avx512_vpermi2var_d_128:
3130 case Intrinsic::x86_avx512_vpermi2var_d_256:
3131 case Intrinsic::x86_avx512_vpermi2var_d_512:
3132 case Intrinsic::x86_avx512_vpermi2var_hi_128:
3133 case Intrinsic::x86_avx512_vpermi2var_hi_256:
3134 case Intrinsic::x86_avx512_vpermi2var_hi_512:
3135 case Intrinsic::x86_avx512_vpermi2var_pd_128:
3136 case Intrinsic::x86_avx512_vpermi2var_pd_256:
3137 case Intrinsic::x86_avx512_vpermi2var_pd_512:
3138 case Intrinsic::x86_avx512_vpermi2var_ps_128:
3139 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3140 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3141 case Intrinsic::x86_avx512_vpermi2var_q_128:
3142 case Intrinsic::x86_avx512_vpermi2var_q_256:
3143 case Intrinsic::x86_avx512_vpermi2var_q_512:
3144 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3145 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3146 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3154 case Intrinsic::x86_avx_maskload_ps:
3155 case Intrinsic::x86_avx_maskload_pd:
3156 case Intrinsic::x86_avx_maskload_ps_256:
3157 case Intrinsic::x86_avx_maskload_pd_256:
3158 case Intrinsic::x86_avx2_maskload_d:
3159 case Intrinsic::x86_avx2_maskload_q:
3160 case Intrinsic::x86_avx2_maskload_d_256:
3161 case Intrinsic::x86_avx2_maskload_q_256:
3167 case Intrinsic::x86_sse2_maskmov_dqu:
3168 case Intrinsic::x86_avx_maskstore_ps:
3169 case Intrinsic::x86_avx_maskstore_pd:
3170 case Intrinsic::x86_avx_maskstore_ps_256:
3171 case Intrinsic::x86_avx_maskstore_pd_256:
3172 case Intrinsic::x86_avx2_maskstore_d:
3173 case Intrinsic::x86_avx2_maskstore_q:
3174 case Intrinsic::x86_avx2_maskstore_d_256:
3175 case Intrinsic::x86_avx2_maskstore_q_256:
3181 case Intrinsic::x86_addcarry_32:
3182 case Intrinsic::x86_addcarry_64:
3188 case Intrinsic::x86_avx512_pternlog_d_128:
3189 case Intrinsic::x86_avx512_pternlog_d_256:
3190 case Intrinsic::x86_avx512_pternlog_d_512:
3191 case Intrinsic::x86_avx512_pternlog_q_128:
3192 case Intrinsic::x86_avx512_pternlog_q_256:
3193 case Intrinsic::x86_avx512_pternlog_q_512:
3202 return std::nullopt;
3249 simplifyAndSetOp)
const {
3251 switch (
II.getIntrinsicID()) {
3254 case Intrinsic::x86_xop_vfrcz_ss:
3255 case Intrinsic::x86_xop_vfrcz_sd:
3260 if (!DemandedElts[0]) {
3267 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3270 UndefElts = UndefElts[0];
3274 case Intrinsic::x86_sse_rcp_ss:
3275 case Intrinsic::x86_sse_rsqrt_ss:
3276 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3279 if (!DemandedElts[0]) {
3281 return II.getArgOperand(0);
3290 case Intrinsic::x86_sse_min_ss:
3291 case Intrinsic::x86_sse_max_ss:
3292 case Intrinsic::x86_sse_cmp_ss:
3293 case Intrinsic::x86_sse2_min_sd:
3294 case Intrinsic::x86_sse2_max_sd:
3295 case Intrinsic::x86_sse2_cmp_sd: {
3296 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3299 if (!DemandedElts[0]) {
3301 return II.getArgOperand(0);
3306 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3318 case Intrinsic::x86_sse41_round_ss:
3319 case Intrinsic::x86_sse41_round_sd: {
3321 APInt DemandedElts2 = DemandedElts;
3323 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3326 if (!DemandedElts[0]) {
3328 return II.getArgOperand(0);
3333 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3338 UndefElts |= UndefElts2[0];
3345 case Intrinsic::x86_avx512_mask_add_ss_round:
3346 case Intrinsic::x86_avx512_mask_div_ss_round:
3347 case Intrinsic::x86_avx512_mask_mul_ss_round:
3348 case Intrinsic::x86_avx512_mask_sub_ss_round:
3349 case Intrinsic::x86_avx512_mask_max_ss_round:
3350 case Intrinsic::x86_avx512_mask_min_ss_round:
3351 case Intrinsic::x86_avx512_mask_add_sd_round:
3352 case Intrinsic::x86_avx512_mask_div_sd_round:
3353 case Intrinsic::x86_avx512_mask_mul_sd_round:
3354 case Intrinsic::x86_avx512_mask_sub_sd_round:
3355 case Intrinsic::x86_avx512_mask_max_sd_round:
3356 case Intrinsic::x86_avx512_mask_min_sd_round:
3357 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3360 if (!DemandedElts[0]) {
3362 return II.getArgOperand(0);
3367 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3368 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3372 if (!UndefElts2[0] || !UndefElts3[0])
3377 case Intrinsic::x86_sse3_addsub_pd:
3378 case Intrinsic::x86_sse3_addsub_ps:
3379 case Intrinsic::x86_avx_addsub_pd_256:
3380 case Intrinsic::x86_avx_addsub_ps_256: {
3385 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3386 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3387 if (IsSubOnly || IsAddOnly) {
3388 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3391 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3393 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3396 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3397 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3398 UndefElts &= UndefElts2;
3403 case Intrinsic::x86_avx2_psllv_d:
3404 case Intrinsic::x86_avx2_psllv_d_256:
3405 case Intrinsic::x86_avx2_psllv_q:
3406 case Intrinsic::x86_avx2_psllv_q_256:
3407 case Intrinsic::x86_avx2_psrlv_d:
3408 case Intrinsic::x86_avx2_psrlv_d_256:
3409 case Intrinsic::x86_avx2_psrlv_q:
3410 case Intrinsic::x86_avx2_psrlv_q_256:
3411 case Intrinsic::x86_avx2_psrav_d:
3412 case Intrinsic::x86_avx2_psrav_d_256: {
3413 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3414 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3415 UndefElts &= UndefElts2;
3419 case Intrinsic::x86_sse2_pmulh_w:
3420 case Intrinsic::x86_avx2_pmulh_w:
3421 case Intrinsic::x86_avx512_pmulh_w_512:
3422 case Intrinsic::x86_sse2_pmulhu_w:
3423 case Intrinsic::x86_avx2_pmulhu_w:
3424 case Intrinsic::x86_avx512_pmulhu_w_512:
3425 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3426 case Intrinsic::x86_avx2_pmul_hr_sw:
3427 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3428 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3429 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3434 case Intrinsic::x86_sse2_packssdw_128:
3435 case Intrinsic::x86_sse2_packsswb_128:
3436 case Intrinsic::x86_sse2_packuswb_128:
3437 case Intrinsic::x86_sse41_packusdw:
3438 case Intrinsic::x86_avx2_packssdw:
3439 case Intrinsic::x86_avx2_packsswb:
3440 case Intrinsic::x86_avx2_packusdw:
3441 case Intrinsic::x86_avx2_packuswb:
3442 case Intrinsic::x86_avx512_packssdw_512:
3443 case Intrinsic::x86_avx512_packsswb_512:
3444 case Intrinsic::x86_avx512_packusdw_512:
3445 case Intrinsic::x86_avx512_packuswb_512: {
3446 auto *Ty0 =
II.getArgOperand(0)->getType();
3448 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3450 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3451 unsigned VWidthPerLane = VWidth / NumLanes;
3452 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3458 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3459 APInt OpDemandedElts(InnerVWidth, 0);
3460 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3461 unsigned LaneIdx = Lane * VWidthPerLane;
3462 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3463 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3464 if (DemandedElts[Idx])
3465 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3470 APInt OpUndefElts(InnerVWidth, 0);
3471 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3474 OpUndefElts = OpUndefElts.
zext(VWidth);
3475 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3476 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3477 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3478 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3479 UndefElts |= LaneElts;
3485 case Intrinsic::x86_sse2_pmadd_wd:
3486 case Intrinsic::x86_avx2_pmadd_wd:
3487 case Intrinsic::x86_avx512_pmaddw_d_512:
3488 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3489 case Intrinsic::x86_avx2_pmadd_ub_sw:
3490 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3492 auto *ArgTy =
II.getArgOperand(0)->getType();
3494 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3496 APInt Op0UndefElts(InnerVWidth, 0);
3497 APInt Op1UndefElts(InnerVWidth, 0);
3498 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3499 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3505 case Intrinsic::x86_ssse3_pshuf_b_128:
3506 case Intrinsic::x86_avx2_pshuf_b:
3507 case Intrinsic::x86_avx512_pshuf_b_512:
3509 case Intrinsic::x86_avx_vpermilvar_ps:
3510 case Intrinsic::x86_avx_vpermilvar_ps_256:
3511 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3512 case Intrinsic::x86_avx_vpermilvar_pd:
3513 case Intrinsic::x86_avx_vpermilvar_pd_256:
3514 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3516 case Intrinsic::x86_avx2_permd:
3517 case Intrinsic::x86_avx2_permps: {
3518 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3524 case Intrinsic::x86_sse4a_extrq:
3525 case Intrinsic::x86_sse4a_extrqi:
3526 case Intrinsic::x86_sse4a_insertq:
3527 case Intrinsic::x86_sse4a_insertqi:
3531 return std::nullopt;