117 bool LogicalShift =
false;
118 bool ShiftLeft =
false;
121 switch (
II.getIntrinsicID()) {
124 case Intrinsic::x86_sse2_psrai_d:
125 case Intrinsic::x86_sse2_psrai_w:
126 case Intrinsic::x86_avx2_psrai_d:
127 case Intrinsic::x86_avx2_psrai_w:
128 case Intrinsic::x86_avx512_psrai_q_128:
129 case Intrinsic::x86_avx512_psrai_q_256:
130 case Intrinsic::x86_avx512_psrai_d_512:
131 case Intrinsic::x86_avx512_psrai_q_512:
132 case Intrinsic::x86_avx512_psrai_w_512:
135 case Intrinsic::x86_sse2_psra_d:
136 case Intrinsic::x86_sse2_psra_w:
137 case Intrinsic::x86_avx2_psra_d:
138 case Intrinsic::x86_avx2_psra_w:
139 case Intrinsic::x86_avx512_psra_q_128:
140 case Intrinsic::x86_avx512_psra_q_256:
141 case Intrinsic::x86_avx512_psra_d_512:
142 case Intrinsic::x86_avx512_psra_q_512:
143 case Intrinsic::x86_avx512_psra_w_512:
144 LogicalShift =
false;
147 case Intrinsic::x86_sse2_psrli_d:
148 case Intrinsic::x86_sse2_psrli_q:
149 case Intrinsic::x86_sse2_psrli_w:
150 case Intrinsic::x86_avx2_psrli_d:
151 case Intrinsic::x86_avx2_psrli_q:
152 case Intrinsic::x86_avx2_psrli_w:
153 case Intrinsic::x86_avx512_psrli_d_512:
154 case Intrinsic::x86_avx512_psrli_q_512:
155 case Intrinsic::x86_avx512_psrli_w_512:
158 case Intrinsic::x86_sse2_psrl_d:
159 case Intrinsic::x86_sse2_psrl_q:
160 case Intrinsic::x86_sse2_psrl_w:
161 case Intrinsic::x86_avx2_psrl_d:
162 case Intrinsic::x86_avx2_psrl_q:
163 case Intrinsic::x86_avx2_psrl_w:
164 case Intrinsic::x86_avx512_psrl_d_512:
165 case Intrinsic::x86_avx512_psrl_q_512:
166 case Intrinsic::x86_avx512_psrl_w_512:
170 case Intrinsic::x86_sse2_pslli_d:
171 case Intrinsic::x86_sse2_pslli_q:
172 case Intrinsic::x86_sse2_pslli_w:
173 case Intrinsic::x86_avx2_pslli_d:
174 case Intrinsic::x86_avx2_pslli_q:
175 case Intrinsic::x86_avx2_pslli_w:
176 case Intrinsic::x86_avx512_pslli_d_512:
177 case Intrinsic::x86_avx512_pslli_q_512:
178 case Intrinsic::x86_avx512_pslli_w_512:
181 case Intrinsic::x86_sse2_psll_d:
182 case Intrinsic::x86_sse2_psll_q:
183 case Intrinsic::x86_sse2_psll_w:
184 case Intrinsic::x86_avx2_psll_d:
185 case Intrinsic::x86_avx2_psll_q:
186 case Intrinsic::x86_avx2_psll_w:
187 case Intrinsic::x86_avx512_psll_d_512:
188 case Intrinsic::x86_avx512_psll_q_512:
189 case Intrinsic::x86_avx512_psll_w_512:
194 assert((LogicalShift || !ShiftLeft) &&
"Only logical shifts can shift left");
196 Value *Vec =
II.getArgOperand(0);
197 Value *Amt =
II.getArgOperand(1);
199 Type *SVT = VT->getElementType();
201 unsigned VWidth = VT->getNumElements();
212 Amt = Builder.CreateZExtOrTrunc(Amt, SVT);
213 Amt = Builder.CreateVectorSplat(VWidth, Amt);
214 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
215 : Builder.CreateLShr(Vec, Amt))
216 : Builder.CreateAShr(Vec, Amt));
221 Amt = ConstantInt::get(SVT,
BitWidth - 1);
222 return Builder.CreateAShr(Vec, Builder.CreateVectorSplat(VWidth, Amt));
229 "Unexpected shift-by-scalar type");
234 Amt, DemandedLower,
II.getDataLayout());
236 Amt, DemandedUpper,
II.getDataLayout());
240 Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
241 return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
242 : Builder.CreateLShr(Vec, Amt))
243 : Builder.CreateAShr(Vec, Amt));
256 "Unexpected shift-by-scalar type");
260 for (
unsigned i = 0, NumSubElts = 64 /
BitWidth; i != NumSubElts; ++i) {
261 unsigned SubEltIdx = (NumSubElts - 1) - i;
264 Count |= SubElt->getValue().zextOrTrunc(64);
282 auto ShiftAmt = ConstantInt::get(SVT,
Count.zextOrTrunc(
BitWidth));
283 auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
286 return Builder.CreateShl(Vec, ShiftVec);
289 return Builder.CreateLShr(Vec, ShiftVec);
291 return Builder.CreateAShr(Vec, ShiftVec);
2155 auto SimplifyDemandedVectorEltsLow = [&IC](
Value *
Op,
unsigned Width,
2156 unsigned DemandedWidth) {
2157 APInt UndefElts(Width, 0);
2164 case Intrinsic::x86_bmi_bextr_32:
2165 case Intrinsic::x86_bmi_bextr_64:
2166 case Intrinsic::x86_tbm_bextri_u32:
2167 case Intrinsic::x86_tbm_bextri_u64:
2173 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2180 uint64_t Result = InC->getZExtValue() >> Shift;
2185 ConstantInt::get(
II.getType(), Result));
2192 case Intrinsic::x86_bmi_bzhi_32:
2193 case Intrinsic::x86_bmi_bzhi_64:
2196 uint64_t Index =
C->getZExtValue() & 0xff;
2197 unsigned BitWidth =
II.getType()->getIntegerBitWidth();
2206 uint64_t Result = InC->getZExtValue();
2209 ConstantInt::get(
II.getType(), Result));
2214 case Intrinsic::x86_bmi_pext_32:
2215 case Intrinsic::x86_bmi_pext_64:
2217 if (MaskC->isNullValue()) {
2220 if (MaskC->isAllOnesValue()) {
2224 unsigned MaskIdx, MaskLen;
2225 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2231 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2237 uint64_t Src = SrcC->getZExtValue();
2238 uint64_t Mask = MaskC->getZExtValue();
2245 if (BitToTest & Src)
2254 ConstantInt::get(
II.getType(), Result));
2258 case Intrinsic::x86_bmi_pdep_32:
2259 case Intrinsic::x86_bmi_pdep_64:
2261 if (MaskC->isNullValue()) {
2264 if (MaskC->isAllOnesValue()) {
2268 unsigned MaskIdx, MaskLen;
2269 if (MaskC->getValue().isShiftedMask(MaskIdx, MaskLen)) {
2274 Value *ShiftAmt = ConstantInt::get(
II.getType(), MaskIdx);
2281 uint64_t Src = SrcC->getZExtValue();
2282 uint64_t Mask = MaskC->getZExtValue();
2289 if (BitToTest & Src)
2298 ConstantInt::get(
II.getType(), Result));
2303 case Intrinsic::x86_sse_cvtss2si:
2304 case Intrinsic::x86_sse_cvtss2si64:
2305 case Intrinsic::x86_sse_cvttss2si:
2306 case Intrinsic::x86_sse_cvttss2si64:
2307 case Intrinsic::x86_sse2_cvtsd2si:
2308 case Intrinsic::x86_sse2_cvtsd2si64:
2309 case Intrinsic::x86_sse2_cvttsd2si:
2310 case Intrinsic::x86_sse2_cvttsd2si64:
2311 case Intrinsic::x86_avx512_vcvtss2si32:
2312 case Intrinsic::x86_avx512_vcvtss2si64:
2313 case Intrinsic::x86_avx512_vcvtss2usi32:
2314 case Intrinsic::x86_avx512_vcvtss2usi64:
2315 case Intrinsic::x86_avx512_vcvtsd2si32:
2316 case Intrinsic::x86_avx512_vcvtsd2si64:
2317 case Intrinsic::x86_avx512_vcvtsd2usi32:
2318 case Intrinsic::x86_avx512_vcvtsd2usi64:
2319 case Intrinsic::x86_avx512_cvttss2si:
2320 case Intrinsic::x86_avx512_cvttss2si64:
2321 case Intrinsic::x86_avx512_cvttss2usi:
2322 case Intrinsic::x86_avx512_cvttss2usi64:
2323 case Intrinsic::x86_avx512_cvttsd2si:
2324 case Intrinsic::x86_avx512_cvttsd2si64:
2325 case Intrinsic::x86_avx512_cvttsd2usi:
2326 case Intrinsic::x86_avx512_cvttsd2usi64: {
2329 Value *Arg =
II.getArgOperand(0);
2331 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2337 case Intrinsic::x86_mmx_pmovmskb:
2338 case Intrinsic::x86_sse_movmsk_ps:
2339 case Intrinsic::x86_sse2_movmsk_pd:
2340 case Intrinsic::x86_sse2_pmovmskb_128:
2341 case Intrinsic::x86_avx_movmsk_pd_256:
2342 case Intrinsic::x86_avx_movmsk_ps_256:
2343 case Intrinsic::x86_avx2_pmovmskb:
2349 case Intrinsic::x86_sse_comieq_ss:
2350 case Intrinsic::x86_sse_comige_ss:
2351 case Intrinsic::x86_sse_comigt_ss:
2352 case Intrinsic::x86_sse_comile_ss:
2353 case Intrinsic::x86_sse_comilt_ss:
2354 case Intrinsic::x86_sse_comineq_ss:
2355 case Intrinsic::x86_sse_ucomieq_ss:
2356 case Intrinsic::x86_sse_ucomige_ss:
2357 case Intrinsic::x86_sse_ucomigt_ss:
2358 case Intrinsic::x86_sse_ucomile_ss:
2359 case Intrinsic::x86_sse_ucomilt_ss:
2360 case Intrinsic::x86_sse_ucomineq_ss:
2361 case Intrinsic::x86_sse2_comieq_sd:
2362 case Intrinsic::x86_sse2_comige_sd:
2363 case Intrinsic::x86_sse2_comigt_sd:
2364 case Intrinsic::x86_sse2_comile_sd:
2365 case Intrinsic::x86_sse2_comilt_sd:
2366 case Intrinsic::x86_sse2_comineq_sd:
2367 case Intrinsic::x86_sse2_ucomieq_sd:
2368 case Intrinsic::x86_sse2_ucomige_sd:
2369 case Intrinsic::x86_sse2_ucomigt_sd:
2370 case Intrinsic::x86_sse2_ucomile_sd:
2371 case Intrinsic::x86_sse2_ucomilt_sd:
2372 case Intrinsic::x86_sse2_ucomineq_sd:
2373 case Intrinsic::x86_avx512_vcomi_ss:
2374 case Intrinsic::x86_avx512_vcomi_sd:
2375 case Intrinsic::x86_avx512_mask_cmp_ss:
2376 case Intrinsic::x86_avx512_mask_cmp_sd: {
2379 bool MadeChange =
false;
2380 Value *Arg0 =
II.getArgOperand(0);
2381 Value *Arg1 =
II.getArgOperand(1);
2383 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2387 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2397 case Intrinsic::x86_avx512_add_ps_512:
2398 case Intrinsic::x86_avx512_div_ps_512:
2399 case Intrinsic::x86_avx512_mul_ps_512:
2400 case Intrinsic::x86_avx512_sub_ps_512:
2401 case Intrinsic::x86_avx512_add_pd_512:
2402 case Intrinsic::x86_avx512_div_pd_512:
2403 case Intrinsic::x86_avx512_mul_pd_512:
2404 case Intrinsic::x86_avx512_sub_pd_512:
2408 if (R->getValue() == 4) {
2409 Value *Arg0 =
II.getArgOperand(0);
2410 Value *Arg1 =
II.getArgOperand(1);
2416 case Intrinsic::x86_avx512_add_ps_512:
2417 case Intrinsic::x86_avx512_add_pd_512:
2420 case Intrinsic::x86_avx512_sub_ps_512:
2421 case Intrinsic::x86_avx512_sub_pd_512:
2424 case Intrinsic::x86_avx512_mul_ps_512:
2425 case Intrinsic::x86_avx512_mul_pd_512:
2428 case Intrinsic::x86_avx512_div_ps_512:
2429 case Intrinsic::x86_avx512_div_pd_512:
2439 case Intrinsic::x86_avx512_mask_add_ss_round:
2440 case Intrinsic::x86_avx512_mask_div_ss_round:
2441 case Intrinsic::x86_avx512_mask_mul_ss_round:
2442 case Intrinsic::x86_avx512_mask_sub_ss_round:
2443 case Intrinsic::x86_avx512_mask_add_sd_round:
2444 case Intrinsic::x86_avx512_mask_div_sd_round:
2445 case Intrinsic::x86_avx512_mask_mul_sd_round:
2446 case Intrinsic::x86_avx512_mask_sub_sd_round:
2450 if (R->getValue() == 4) {
2452 Value *Arg0 =
II.getArgOperand(0);
2453 Value *Arg1 =
II.getArgOperand(1);
2461 case Intrinsic::x86_avx512_mask_add_ss_round:
2462 case Intrinsic::x86_avx512_mask_add_sd_round:
2465 case Intrinsic::x86_avx512_mask_sub_ss_round:
2466 case Intrinsic::x86_avx512_mask_sub_sd_round:
2469 case Intrinsic::x86_avx512_mask_mul_ss_round:
2470 case Intrinsic::x86_avx512_mask_mul_sd_round:
2473 case Intrinsic::x86_avx512_mask_div_ss_round:
2474 case Intrinsic::x86_avx512_mask_div_sd_round:
2480 Value *Mask =
II.getArgOperand(3);
2483 if (!
C || !
C->getValue()[0]) {
2507 case Intrinsic::x86_sse2_psrai_d:
2508 case Intrinsic::x86_sse2_psrai_w:
2509 case Intrinsic::x86_avx2_psrai_d:
2510 case Intrinsic::x86_avx2_psrai_w:
2511 case Intrinsic::x86_avx512_psrai_q_128:
2512 case Intrinsic::x86_avx512_psrai_q_256:
2513 case Intrinsic::x86_avx512_psrai_d_512:
2514 case Intrinsic::x86_avx512_psrai_q_512:
2515 case Intrinsic::x86_avx512_psrai_w_512:
2516 case Intrinsic::x86_sse2_psrli_d:
2517 case Intrinsic::x86_sse2_psrli_q:
2518 case Intrinsic::x86_sse2_psrli_w:
2519 case Intrinsic::x86_avx2_psrli_d:
2520 case Intrinsic::x86_avx2_psrli_q:
2521 case Intrinsic::x86_avx2_psrli_w:
2522 case Intrinsic::x86_avx512_psrli_d_512:
2523 case Intrinsic::x86_avx512_psrli_q_512:
2524 case Intrinsic::x86_avx512_psrli_w_512:
2525 case Intrinsic::x86_sse2_pslli_d:
2526 case Intrinsic::x86_sse2_pslli_q:
2527 case Intrinsic::x86_sse2_pslli_w:
2528 case Intrinsic::x86_avx2_pslli_d:
2529 case Intrinsic::x86_avx2_pslli_q:
2530 case Intrinsic::x86_avx2_pslli_w:
2531 case Intrinsic::x86_avx512_pslli_d_512:
2532 case Intrinsic::x86_avx512_pslli_q_512:
2533 case Intrinsic::x86_avx512_pslli_w_512:
2539 case Intrinsic::x86_sse2_psra_d:
2540 case Intrinsic::x86_sse2_psra_w:
2541 case Intrinsic::x86_avx2_psra_d:
2542 case Intrinsic::x86_avx2_psra_w:
2543 case Intrinsic::x86_avx512_psra_q_128:
2544 case Intrinsic::x86_avx512_psra_q_256:
2545 case Intrinsic::x86_avx512_psra_d_512:
2546 case Intrinsic::x86_avx512_psra_q_512:
2547 case Intrinsic::x86_avx512_psra_w_512:
2548 case Intrinsic::x86_sse2_psrl_d:
2549 case Intrinsic::x86_sse2_psrl_q:
2550 case Intrinsic::x86_sse2_psrl_w:
2551 case Intrinsic::x86_avx2_psrl_d:
2552 case Intrinsic::x86_avx2_psrl_q:
2553 case Intrinsic::x86_avx2_psrl_w:
2554 case Intrinsic::x86_avx512_psrl_d_512:
2555 case Intrinsic::x86_avx512_psrl_q_512:
2556 case Intrinsic::x86_avx512_psrl_w_512:
2557 case Intrinsic::x86_sse2_psll_d:
2558 case Intrinsic::x86_sse2_psll_q:
2559 case Intrinsic::x86_sse2_psll_w:
2560 case Intrinsic::x86_avx2_psll_d:
2561 case Intrinsic::x86_avx2_psll_q:
2562 case Intrinsic::x86_avx2_psll_w:
2563 case Intrinsic::x86_avx512_psll_d_512:
2564 case Intrinsic::x86_avx512_psll_q_512:
2565 case Intrinsic::x86_avx512_psll_w_512: {
2572 Value *Arg1 =
II.getArgOperand(1);
2574 "Unexpected packed shift size");
2577 if (
Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2583 case Intrinsic::x86_avx2_psllv_d:
2584 case Intrinsic::x86_avx2_psllv_d_256:
2585 case Intrinsic::x86_avx2_psllv_q:
2586 case Intrinsic::x86_avx2_psllv_q_256:
2587 case Intrinsic::x86_avx512_psllv_d_512:
2588 case Intrinsic::x86_avx512_psllv_q_512:
2589 case Intrinsic::x86_avx512_psllv_w_128:
2590 case Intrinsic::x86_avx512_psllv_w_256:
2591 case Intrinsic::x86_avx512_psllv_w_512:
2592 case Intrinsic::x86_avx2_psrav_d:
2593 case Intrinsic::x86_avx2_psrav_d_256:
2594 case Intrinsic::x86_avx512_psrav_q_128:
2595 case Intrinsic::x86_avx512_psrav_q_256:
2596 case Intrinsic::x86_avx512_psrav_d_512:
2597 case Intrinsic::x86_avx512_psrav_q_512:
2598 case Intrinsic::x86_avx512_psrav_w_128:
2599 case Intrinsic::x86_avx512_psrav_w_256:
2600 case Intrinsic::x86_avx512_psrav_w_512:
2601 case Intrinsic::x86_avx2_psrlv_d:
2602 case Intrinsic::x86_avx2_psrlv_d_256:
2603 case Intrinsic::x86_avx2_psrlv_q:
2604 case Intrinsic::x86_avx2_psrlv_q_256:
2605 case Intrinsic::x86_avx512_psrlv_d_512:
2606 case Intrinsic::x86_avx512_psrlv_q_512:
2607 case Intrinsic::x86_avx512_psrlv_w_128:
2608 case Intrinsic::x86_avx512_psrlv_w_256:
2609 case Intrinsic::x86_avx512_psrlv_w_512:
2615 case Intrinsic::x86_sse2_packssdw_128:
2616 case Intrinsic::x86_sse2_packsswb_128:
2617 case Intrinsic::x86_avx2_packssdw:
2618 case Intrinsic::x86_avx2_packsswb:
2619 case Intrinsic::x86_avx512_packssdw_512:
2620 case Intrinsic::x86_avx512_packsswb_512:
2626 case Intrinsic::x86_sse2_packuswb_128:
2627 case Intrinsic::x86_sse41_packusdw:
2628 case Intrinsic::x86_avx2_packusdw:
2629 case Intrinsic::x86_avx2_packuswb:
2630 case Intrinsic::x86_avx512_packusdw_512:
2631 case Intrinsic::x86_avx512_packuswb_512:
2637 case Intrinsic::x86_sse2_pmulh_w:
2638 case Intrinsic::x86_avx2_pmulh_w:
2639 case Intrinsic::x86_avx512_pmulh_w_512:
2645 case Intrinsic::x86_sse2_pmulhu_w:
2646 case Intrinsic::x86_avx2_pmulhu_w:
2647 case Intrinsic::x86_avx512_pmulhu_w_512:
2653 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
2654 case Intrinsic::x86_avx2_pmul_hr_sw:
2655 case Intrinsic::x86_avx512_pmul_hr_sw_512:
2661 case Intrinsic::x86_sse2_pmadd_wd:
2662 case Intrinsic::x86_avx2_pmadd_wd:
2663 case Intrinsic::x86_avx512_pmaddw_d_512:
2669 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
2670 case Intrinsic::x86_avx2_pmadd_ub_sw:
2671 case Intrinsic::x86_avx512_pmaddubs_w_512:
2677 case Intrinsic::x86_pclmulqdq:
2678 case Intrinsic::x86_pclmulqdq_256:
2679 case Intrinsic::x86_pclmulqdq_512: {
2681 unsigned Imm =
C->getZExtValue();
2683 bool MadeChange =
false;
2684 Value *Arg0 =
II.getArgOperand(0);
2685 Value *Arg1 =
II.getArgOperand(1);
2689 APInt UndefElts1(VWidth, 0);
2690 APInt DemandedElts1 =
2698 APInt UndefElts2(VWidth, 0);
2699 APInt DemandedElts2 =
2721 case Intrinsic::x86_sse41_insertps:
2727 case Intrinsic::x86_sse4a_extrq: {
2728 Value *Op0 =
II.getArgOperand(0);
2729 Value *Op1 =
II.getArgOperand(1);
2734 VWidth1 == 16 &&
"Unexpected operand sizes");
2752 bool MadeChange =
false;
2753 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2757 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2767 case Intrinsic::x86_sse4a_extrqi: {
2770 Value *Op0 =
II.getArgOperand(0);
2773 "Unexpected operand size");
2786 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2792 case Intrinsic::x86_sse4a_insertq: {
2793 Value *Op0 =
II.getArgOperand(0);
2794 Value *Op1 =
II.getArgOperand(1);
2799 "Unexpected operand size");
2809 const APInt &V11 = CI11->getValue();
2819 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2825 case Intrinsic::x86_sse4a_insertqi: {
2829 Value *Op0 =
II.getArgOperand(0);
2830 Value *Op1 =
II.getArgOperand(1);
2835 VWidth1 == 2 &&
"Unexpected operand sizes");
2842 if (CILength && CIIndex) {
2843 APInt Len = CILength->getValue().zextOrTrunc(6);
2852 bool MadeChange =
false;
2853 if (
Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2857 if (
Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2867 case Intrinsic::x86_sse41_pblendvb:
2868 case Intrinsic::x86_sse41_blendvps:
2869 case Intrinsic::x86_sse41_blendvpd:
2870 case Intrinsic::x86_avx_blendv_ps_256:
2871 case Intrinsic::x86_avx_blendv_pd_256:
2872 case Intrinsic::x86_avx2_pblendvb: {
2874 Value *Op0 =
II.getArgOperand(0);
2875 Value *Op1 =
II.getArgOperand(1);
2876 Value *Mask =
II.getArgOperand(2);
2898 Value *MaskSrc =
nullptr;
2901 m_Mask(ShuffleMask))))) {
2904 if (NumElts < (
int)ShuffleMask.size() || !
isPowerOf2_32(NumElts) ||
2906 [NumElts](
int M) {
return M < 0 || M >= NumElts; }))
2919 unsigned NumMaskElts = MaskTy->getNumElements();
2920 unsigned NumOperandElts = OpTy->getNumElements();
2924 unsigned NumMaskSrcElts =
2926 NumMaskElts = (ShuffleMask.size() * NumMaskElts) / NumMaskSrcElts;
2928 if (NumMaskElts > NumOperandElts)
2936 assert(MaskTy->getPrimitiveSizeInBits() ==
2937 OpTy->getPrimitiveSizeInBits() &&
2938 "Not expecting mask and operands with different sizes");
2940 if (NumMaskElts == NumOperandElts) {
2946 if (NumMaskElts < NumOperandElts) {
2957 case Intrinsic::x86_ssse3_pshuf_b_128:
2958 case Intrinsic::x86_avx2_pshuf_b:
2959 case Intrinsic::x86_avx512_pshuf_b_512: {
2970 case Intrinsic::x86_avx_vpermilvar_ps:
2971 case Intrinsic::x86_avx_vpermilvar_ps_256:
2972 case Intrinsic::x86_avx512_vpermilvar_ps_512: {
2983 case Intrinsic::x86_avx_vpermilvar_pd:
2984 case Intrinsic::x86_avx_vpermilvar_pd_256:
2985 case Intrinsic::x86_avx512_vpermilvar_pd_512: {
2996 case Intrinsic::x86_avx2_permd:
2997 case Intrinsic::x86_avx2_permps:
2998 case Intrinsic::x86_avx512_permvar_df_256:
2999 case Intrinsic::x86_avx512_permvar_df_512:
3000 case Intrinsic::x86_avx512_permvar_di_256:
3001 case Intrinsic::x86_avx512_permvar_di_512:
3002 case Intrinsic::x86_avx512_permvar_hi_128:
3003 case Intrinsic::x86_avx512_permvar_hi_256:
3004 case Intrinsic::x86_avx512_permvar_hi_512:
3005 case Intrinsic::x86_avx512_permvar_qi_128:
3006 case Intrinsic::x86_avx512_permvar_qi_256:
3007 case Intrinsic::x86_avx512_permvar_qi_512:
3008 case Intrinsic::x86_avx512_permvar_sf_512:
3009 case Intrinsic::x86_avx512_permvar_si_512:
3017 case Intrinsic::x86_avx512_vpermi2var_d_128:
3018 case Intrinsic::x86_avx512_vpermi2var_d_256:
3019 case Intrinsic::x86_avx512_vpermi2var_d_512:
3020 case Intrinsic::x86_avx512_vpermi2var_hi_128:
3021 case Intrinsic::x86_avx512_vpermi2var_hi_256:
3022 case Intrinsic::x86_avx512_vpermi2var_hi_512:
3023 case Intrinsic::x86_avx512_vpermi2var_pd_128:
3024 case Intrinsic::x86_avx512_vpermi2var_pd_256:
3025 case Intrinsic::x86_avx512_vpermi2var_pd_512:
3026 case Intrinsic::x86_avx512_vpermi2var_ps_128:
3027 case Intrinsic::x86_avx512_vpermi2var_ps_256:
3028 case Intrinsic::x86_avx512_vpermi2var_ps_512:
3029 case Intrinsic::x86_avx512_vpermi2var_q_128:
3030 case Intrinsic::x86_avx512_vpermi2var_q_256:
3031 case Intrinsic::x86_avx512_vpermi2var_q_512:
3032 case Intrinsic::x86_avx512_vpermi2var_qi_128:
3033 case Intrinsic::x86_avx512_vpermi2var_qi_256:
3034 case Intrinsic::x86_avx512_vpermi2var_qi_512:
3042 case Intrinsic::x86_avx_maskload_ps:
3043 case Intrinsic::x86_avx_maskload_pd:
3044 case Intrinsic::x86_avx_maskload_ps_256:
3045 case Intrinsic::x86_avx_maskload_pd_256:
3046 case Intrinsic::x86_avx2_maskload_d:
3047 case Intrinsic::x86_avx2_maskload_q:
3048 case Intrinsic::x86_avx2_maskload_d_256:
3049 case Intrinsic::x86_avx2_maskload_q_256:
3055 case Intrinsic::x86_sse2_maskmov_dqu:
3056 case Intrinsic::x86_avx_maskstore_ps:
3057 case Intrinsic::x86_avx_maskstore_pd:
3058 case Intrinsic::x86_avx_maskstore_ps_256:
3059 case Intrinsic::x86_avx_maskstore_pd_256:
3060 case Intrinsic::x86_avx2_maskstore_d:
3061 case Intrinsic::x86_avx2_maskstore_q:
3062 case Intrinsic::x86_avx2_maskstore_d_256:
3063 case Intrinsic::x86_avx2_maskstore_q_256:
3069 case Intrinsic::x86_addcarry_32:
3070 case Intrinsic::x86_addcarry_64:
3076 case Intrinsic::x86_avx512_pternlog_d_128:
3077 case Intrinsic::x86_avx512_pternlog_d_256:
3078 case Intrinsic::x86_avx512_pternlog_d_512:
3079 case Intrinsic::x86_avx512_pternlog_q_128:
3080 case Intrinsic::x86_avx512_pternlog_q_256:
3081 case Intrinsic::x86_avx512_pternlog_q_512:
3089 return std::nullopt;
3136 simplifyAndSetOp)
const {
3138 switch (
II.getIntrinsicID()) {
3141 case Intrinsic::x86_xop_vfrcz_ss:
3142 case Intrinsic::x86_xop_vfrcz_sd:
3147 if (!DemandedElts[0]) {
3154 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3157 UndefElts = UndefElts[0];
3161 case Intrinsic::x86_sse_rcp_ss:
3162 case Intrinsic::x86_sse_rsqrt_ss:
3163 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3166 if (!DemandedElts[0]) {
3168 return II.getArgOperand(0);
3177 case Intrinsic::x86_sse_min_ss:
3178 case Intrinsic::x86_sse_max_ss:
3179 case Intrinsic::x86_sse_cmp_ss:
3180 case Intrinsic::x86_sse2_min_sd:
3181 case Intrinsic::x86_sse2_max_sd:
3182 case Intrinsic::x86_sse2_cmp_sd: {
3183 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3186 if (!DemandedElts[0]) {
3188 return II.getArgOperand(0);
3193 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3205 case Intrinsic::x86_sse41_round_ss:
3206 case Intrinsic::x86_sse41_round_sd: {
3208 APInt DemandedElts2 = DemandedElts;
3210 simplifyAndSetOp(&
II, 0, DemandedElts2, UndefElts);
3213 if (!DemandedElts[0]) {
3215 return II.getArgOperand(0);
3220 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3225 UndefElts |= UndefElts2[0];
3232 case Intrinsic::x86_avx512_mask_add_ss_round:
3233 case Intrinsic::x86_avx512_mask_div_ss_round:
3234 case Intrinsic::x86_avx512_mask_mul_ss_round:
3235 case Intrinsic::x86_avx512_mask_sub_ss_round:
3236 case Intrinsic::x86_avx512_mask_max_ss_round:
3237 case Intrinsic::x86_avx512_mask_min_ss_round:
3238 case Intrinsic::x86_avx512_mask_add_sd_round:
3239 case Intrinsic::x86_avx512_mask_div_sd_round:
3240 case Intrinsic::x86_avx512_mask_mul_sd_round:
3241 case Intrinsic::x86_avx512_mask_sub_sd_round:
3242 case Intrinsic::x86_avx512_mask_max_sd_round:
3243 case Intrinsic::x86_avx512_mask_min_sd_round:
3244 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3247 if (!DemandedElts[0]) {
3249 return II.getArgOperand(0);
3254 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3255 simplifyAndSetOp(&
II, 2, DemandedElts, UndefElts3);
3259 if (!UndefElts2[0] || !UndefElts3[0])
3264 case Intrinsic::x86_sse3_addsub_pd:
3265 case Intrinsic::x86_sse3_addsub_ps:
3266 case Intrinsic::x86_avx_addsub_pd_256:
3267 case Intrinsic::x86_avx_addsub_ps_256: {
3272 bool IsSubOnly = DemandedElts.
isSubsetOf(SubMask);
3273 bool IsAddOnly = DemandedElts.
isSubsetOf(AddMask);
3274 if (IsSubOnly || IsAddOnly) {
3275 assert((IsSubOnly ^ IsAddOnly) &&
"Can't be both add-only and sub-only");
3278 Value *Arg0 =
II.getArgOperand(0), *Arg1 =
II.getArgOperand(1);
3280 IsSubOnly ? Instruction::FSub : Instruction::FAdd, Arg0, Arg1);
3283 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3284 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3285 UndefElts &= UndefElts2;
3290 case Intrinsic::x86_avx2_psllv_d:
3291 case Intrinsic::x86_avx2_psllv_d_256:
3292 case Intrinsic::x86_avx2_psllv_q:
3293 case Intrinsic::x86_avx2_psllv_q_256:
3294 case Intrinsic::x86_avx2_psrlv_d:
3295 case Intrinsic::x86_avx2_psrlv_d_256:
3296 case Intrinsic::x86_avx2_psrlv_q:
3297 case Intrinsic::x86_avx2_psrlv_q_256:
3298 case Intrinsic::x86_avx2_psrav_d:
3299 case Intrinsic::x86_avx2_psrav_d_256: {
3300 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3301 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3302 UndefElts &= UndefElts2;
3306 case Intrinsic::x86_sse2_pmulh_w:
3307 case Intrinsic::x86_avx2_pmulh_w:
3308 case Intrinsic::x86_avx512_pmulh_w_512:
3309 case Intrinsic::x86_sse2_pmulhu_w:
3310 case Intrinsic::x86_avx2_pmulhu_w:
3311 case Intrinsic::x86_avx512_pmulhu_w_512:
3312 case Intrinsic::x86_ssse3_pmul_hr_sw_128:
3313 case Intrinsic::x86_avx2_pmul_hr_sw:
3314 case Intrinsic::x86_avx512_pmul_hr_sw_512: {
3315 simplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
3316 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts2);
3321 case Intrinsic::x86_sse2_packssdw_128:
3322 case Intrinsic::x86_sse2_packsswb_128:
3323 case Intrinsic::x86_sse2_packuswb_128:
3324 case Intrinsic::x86_sse41_packusdw:
3325 case Intrinsic::x86_avx2_packssdw:
3326 case Intrinsic::x86_avx2_packsswb:
3327 case Intrinsic::x86_avx2_packusdw:
3328 case Intrinsic::x86_avx2_packuswb:
3329 case Intrinsic::x86_avx512_packssdw_512:
3330 case Intrinsic::x86_avx512_packsswb_512:
3331 case Intrinsic::x86_avx512_packusdw_512:
3332 case Intrinsic::x86_avx512_packuswb_512: {
3333 auto *Ty0 =
II.getArgOperand(0)->getType();
3335 assert(VWidth == (InnerVWidth * 2) &&
"Unexpected input size");
3337 unsigned NumLanes = Ty0->getPrimitiveSizeInBits() / 128;
3338 unsigned VWidthPerLane = VWidth / NumLanes;
3339 unsigned InnerVWidthPerLane = InnerVWidth / NumLanes;
3345 for (
int OpNum = 0; OpNum != 2; ++OpNum) {
3346 APInt OpDemandedElts(InnerVWidth, 0);
3347 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3348 unsigned LaneIdx = Lane * VWidthPerLane;
3349 for (
unsigned Elt = 0; Elt != InnerVWidthPerLane; ++Elt) {
3350 unsigned Idx = LaneIdx + Elt + InnerVWidthPerLane * OpNum;
3351 if (DemandedElts[Idx])
3352 OpDemandedElts.
setBit((Lane * InnerVWidthPerLane) + Elt);
3357 APInt OpUndefElts(InnerVWidth, 0);
3358 simplifyAndSetOp(&
II, OpNum, OpDemandedElts, OpUndefElts);
3361 OpUndefElts = OpUndefElts.
zext(VWidth);
3362 for (
unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3363 APInt LaneElts = OpUndefElts.
lshr(InnerVWidthPerLane * Lane);
3364 LaneElts = LaneElts.
getLoBits(InnerVWidthPerLane);
3365 LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);
3366 UndefElts |= LaneElts;
3372 case Intrinsic::x86_sse2_pmadd_wd:
3373 case Intrinsic::x86_avx2_pmadd_wd:
3374 case Intrinsic::x86_avx512_pmaddw_d_512:
3375 case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3376 case Intrinsic::x86_avx2_pmadd_ub_sw:
3377 case Intrinsic::x86_avx512_pmaddubs_w_512: {
3379 auto *ArgTy =
II.getArgOperand(0)->getType();
3381 assert((VWidth * 2) == InnerVWidth &&
"Unexpected input size");
3383 APInt Op0UndefElts(InnerVWidth, 0);
3384 APInt Op1UndefElts(InnerVWidth, 0);
3385 simplifyAndSetOp(&
II, 0, OpDemandedElts, Op0UndefElts);
3386 simplifyAndSetOp(&
II, 1, OpDemandedElts, Op1UndefElts);
3392 case Intrinsic::x86_ssse3_pshuf_b_128:
3393 case Intrinsic::x86_avx2_pshuf_b:
3394 case Intrinsic::x86_avx512_pshuf_b_512:
3396 case Intrinsic::x86_avx_vpermilvar_ps:
3397 case Intrinsic::x86_avx_vpermilvar_ps_256:
3398 case Intrinsic::x86_avx512_vpermilvar_ps_512:
3399 case Intrinsic::x86_avx_vpermilvar_pd:
3400 case Intrinsic::x86_avx_vpermilvar_pd_256:
3401 case Intrinsic::x86_avx512_vpermilvar_pd_512:
3403 case Intrinsic::x86_avx2_permd:
3404 case Intrinsic::x86_avx2_permps: {
3405 simplifyAndSetOp(&
II, 1, DemandedElts, UndefElts);
3411 case Intrinsic::x86_sse4a_extrq:
3412 case Intrinsic::x86_sse4a_extrqi:
3413 case Intrinsic::x86_sse4a_insertq:
3414 case Intrinsic::x86_sse4a_insertqi:
3418 return std::nullopt;