25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512DQINTRIN_H
29 #define __AVX512DQINTRIN_H
32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
36 return (__m512i) ((__v8du) __A * (__v8du) __B);
41 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
48 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
55 return (__m512d)((__v8du)__A ^ (__v8du)__B);
60 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
67 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
74 return (__m512)((__v16su)__A ^ (__v16su)__B);
79 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
86 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
93 return (__m512d)((__v8du)__A | (__v8du)__B);
98 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
105 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
112 return (__m512)((__v16su)__A | (__v16su)__B);
117 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
124 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
131 return (__m512d)((__v8du)__A & (__v8du)__B);
136 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
143 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
150 return (__m512)((__v16su)__A & (__v16su)__B);
155 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
162 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
169 return (__m512d)(~(__v8du)__A & (__v8du)__B);
174 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
181 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
188 return (__m512)(~(__v16su)__A & (__v16su)__B);
193 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
200 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
207 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
215 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
223 return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
229 #define _mm512_cvt_roundpd_epi64(A, R) __extension__ ({ \
230 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
231 (__v8di)_mm512_setzero_si512(), \
232 (__mmask8)-1, (int)(R)); })
234 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) __extension__ ({ \
235 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
236 (__v8di)(__m512i)(W), \
237 (__mmask8)(U), (int)(R)); })
239 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) __extension__ ({ \
240 (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
241 (__v8di)_mm512_setzero_si512(), \
242 (__mmask8)(U), (int)(R)); })
246 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
254 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
262 return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
268 #define _mm512_cvt_roundpd_epu64(A, R) __extension__ ({ \
269 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
270 (__v8di)_mm512_setzero_si512(), \
271 (__mmask8)-1, (int)(R)); })
273 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) __extension__ ({ \
274 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
275 (__v8di)(__m512i)(W), \
276 (__mmask8)(U), (int)(R)); })
278 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) __extension__ ({ \
279 (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
280 (__v8di)_mm512_setzero_si512(), \
281 (__mmask8)(U), (int)(R)); })
285 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
293 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
301 return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
307 #define _mm512_cvt_roundps_epi64(A, R) __extension__ ({ \
308 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
309 (__v8di)_mm512_setzero_si512(), \
310 (__mmask8)-1, (int)(R)); })
312 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) __extension__ ({ \
313 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
314 (__v8di)(__m512i)(W), \
315 (__mmask8)(U), (int)(R)); })
317 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) __extension__ ({ \
318 (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
319 (__v8di)_mm512_setzero_si512(), \
320 (__mmask8)(U), (int)(R)); })
324 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
332 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
340 return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
346 #define _mm512_cvt_roundps_epu64(A, R) __extension__ ({ \
347 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
348 (__v8di)_mm512_setzero_si512(), \
349 (__mmask8)-1, (int)(R)); })
351 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) __extension__ ({ \
352 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
353 (__v8di)(__m512i)(W), \
354 (__mmask8)(U), (int)(R)); })
356 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) __extension__ ({ \
357 (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
358 (__v8di)_mm512_setzero_si512(), \
359 (__mmask8)(U), (int)(R)); })
364 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
372 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
380 return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
386 #define _mm512_cvt_roundepi64_pd(A, R) __extension__ ({ \
387 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
388 (__v8df)_mm512_setzero_pd(), \
389 (__mmask8)-1, (int)(R)); })
391 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) __extension__ ({ \
392 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
393 (__v8df)(__m512d)(W), \
394 (__mmask8)(U), (int)(R)); })
396 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) __extension__ ({ \
397 (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
398 (__v8df)_mm512_setzero_pd(), \
399 (__mmask8)(U), (int)(R)); })
403 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
411 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
419 return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
425 #define _mm512_cvt_roundepi64_ps(A, R) __extension__ ({ \
426 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
427 (__v8sf)_mm256_setzero_ps(), \
428 (__mmask8)-1, (int)(R)); })
430 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) __extension__ ({ \
431 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
432 (__v8sf)(__m256)(W), (__mmask8)(U), \
435 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) __extension__ ({ \
436 (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
437 (__v8sf)_mm256_setzero_ps(), \
438 (__mmask8)(U), (int)(R)); })
443 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
451 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
459 return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
465 #define _mm512_cvtt_roundpd_epi64(A, R) __extension__ ({ \
466 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
467 (__v8di)_mm512_setzero_si512(), \
468 (__mmask8)-1, (int)(R)); })
470 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) __extension__ ({ \
471 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
472 (__v8di)(__m512i)(W), \
473 (__mmask8)(U), (int)(R)); })
475 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) __extension__ ({ \
476 (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
477 (__v8di)_mm512_setzero_si512(), \
478 (__mmask8)(U), (int)(R)); })
482 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
490 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
498 return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
504 #define _mm512_cvtt_roundpd_epu64(A, R) __extension__ ({ \
505 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
506 (__v8di)_mm512_setzero_si512(), \
507 (__mmask8)-1, (int)(R)); })
509 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) __extension__ ({ \
510 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
511 (__v8di)(__m512i)(W), \
512 (__mmask8)(U), (int)(R)); })
514 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) __extension__ ({ \
515 (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
516 (__v8di)_mm512_setzero_si512(), \
517 (__mmask8)(U), (int)(R)); })
521 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
529 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
537 return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
543 #define _mm512_cvtt_roundps_epi64(A, R) __extension__ ({ \
544 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
545 (__v8di)_mm512_setzero_si512(), \
546 (__mmask8)-1, (int)(R)); })
548 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) __extension__ ({ \
549 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
550 (__v8di)(__m512i)(W), \
551 (__mmask8)(U), (int)(R)); })
553 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) __extension__ ({ \
554 (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
555 (__v8di)_mm512_setzero_si512(), \
556 (__mmask8)(U), (int)(R)); })
560 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
568 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
576 return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
582 #define _mm512_cvtt_roundps_epu64(A, R) __extension__ ({ \
583 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
584 (__v8di)_mm512_setzero_si512(), \
585 (__mmask8)-1, (int)(R)); })
587 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) __extension__ ({ \
588 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
589 (__v8di)(__m512i)(W), \
590 (__mmask8)(U), (int)(R)); })
592 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) __extension__ ({ \
593 (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
594 (__v8di)_mm512_setzero_si512(), \
595 (__mmask8)(U), (int)(R)); })
599 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
607 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
615 return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
621 #define _mm512_cvt_roundepu64_pd(A, R) __extension__ ({ \
622 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
623 (__v8df)_mm512_setzero_pd(), \
624 (__mmask8)-1, (int)(R)); })
626 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) __extension__ ({ \
627 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
628 (__v8df)(__m512d)(W), \
629 (__mmask8)(U), (int)(R)); })
632 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) __extension__ ({ \
633 (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
634 (__v8df)_mm512_setzero_pd(), \
635 (__mmask8)(U), (int)(R)); })
640 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
648 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
656 return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
662 #define _mm512_cvt_roundepu64_ps(A, R) __extension__ ({ \
663 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
664 (__v8sf)_mm256_setzero_ps(), \
665 (__mmask8)-1, (int)(R)); })
667 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) __extension__ ({ \
668 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
669 (__v8sf)(__m256)(W), (__mmask8)(U), \
672 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) __extension__ ({ \
673 (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
674 (__v8sf)_mm256_setzero_ps(), \
675 (__mmask8)(U), (int)(R)); })
677 #define _mm512_range_pd(A, B, C) __extension__ ({ \
678 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
679 (__v8df)(__m512d)(B), (int)(C), \
680 (__v8df)_mm512_setzero_pd(), \
682 _MM_FROUND_CUR_DIRECTION); })
684 #define _mm512_mask_range_pd(W, U, A, B, C) __extension__ ({ \
685 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
686 (__v8df)(__m512d)(B), (int)(C), \
687 (__v8df)(__m512d)(W), (__mmask8)(U), \
688 _MM_FROUND_CUR_DIRECTION); })
690 #define _mm512_maskz_range_pd(U, A, B, C) __extension__ ({ \
691 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
692 (__v8df)(__m512d)(B), (int)(C), \
693 (__v8df)_mm512_setzero_pd(), \
695 _MM_FROUND_CUR_DIRECTION); })
697 #define _mm512_range_round_pd(A, B, C, R) __extension__ ({ \
698 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
699 (__v8df)(__m512d)(B), (int)(C), \
700 (__v8df)_mm512_setzero_pd(), \
701 (__mmask8)-1, (int)(R)); })
703 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) __extension__ ({ \
704 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
705 (__v8df)(__m512d)(B), (int)(C), \
706 (__v8df)(__m512d)(W), (__mmask8)(U), \
709 #define _mm512_maskz_range_round_pd(U, A, B, C, R) __extension__ ({ \
710 (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
711 (__v8df)(__m512d)(B), (int)(C), \
712 (__v8df)_mm512_setzero_pd(), \
713 (__mmask8)(U), (int)(R)); })
715 #define _mm512_range_ps(A, B, C) __extension__ ({ \
716 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
717 (__v16sf)(__m512)(B), (int)(C), \
718 (__v16sf)_mm512_setzero_ps(), \
720 _MM_FROUND_CUR_DIRECTION); })
722 #define _mm512_mask_range_ps(W, U, A, B, C) __extension__ ({ \
723 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
724 (__v16sf)(__m512)(B), (int)(C), \
725 (__v16sf)(__m512)(W), (__mmask16)(U), \
726 _MM_FROUND_CUR_DIRECTION); })
728 #define _mm512_maskz_range_ps(U, A, B, C) __extension__ ({ \
729 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
730 (__v16sf)(__m512)(B), (int)(C), \
731 (__v16sf)_mm512_setzero_ps(), \
733 _MM_FROUND_CUR_DIRECTION); })
735 #define _mm512_range_round_ps(A, B, C, R) __extension__ ({ \
736 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
737 (__v16sf)(__m512)(B), (int)(C), \
738 (__v16sf)_mm512_setzero_ps(), \
739 (__mmask16)-1, (int)(R)); })
741 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) __extension__ ({ \
742 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
743 (__v16sf)(__m512)(B), (int)(C), \
744 (__v16sf)(__m512)(W), (__mmask16)(U), \
747 #define _mm512_maskz_range_round_ps(U, A, B, C, R) __extension__ ({ \
748 (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
749 (__v16sf)(__m512)(B), (int)(C), \
750 (__v16sf)_mm512_setzero_ps(), \
751 (__mmask16)(U), (int)(R)); })
753 #define _mm_range_round_ss(A, B, C, R) __extension__ ({ \
754 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
755 (__v4sf)(__m128)(B), \
756 (__v4sf)_mm_setzero_ps(), \
757 (__mmask8) -1, (int)(C),\
760 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
762 #define _mm_mask_range_round_ss(W, U, A, B, C, R) __extension__ ({ \
763 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
764 (__v4sf)(__m128)(B), \
765 (__v4sf)(__m128)(W),\
766 (__mmask8)(U), (int)(C),\
769 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
771 #define _mm_maskz_range_round_ss(U, A, B, C, R) __extension__ ({ \
772 (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
773 (__v4sf)(__m128)(B), \
774 (__v4sf)_mm_setzero_ps(), \
775 (__mmask8)(U), (int)(C),\
778 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
780 #define _mm_range_round_sd(A, B, C, R) __extension__ ({ \
781 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
782 (__v2df)(__m128d)(B), \
783 (__v2df)_mm_setzero_pd(), \
784 (__mmask8) -1, (int)(C),\
787 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
789 #define _mm_mask_range_round_sd(W, U, A, B, C, R) __extension__ ({ \
790 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
791 (__v2df)(__m128d)(B), \
792 (__v2df)(__m128d)(W),\
793 (__mmask8)(U), (int)(C),\
796 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
798 #define _mm_maskz_range_round_sd(U, A, B, C, R) __extension__ ({ \
799 (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
800 (__v2df)(__m128d)(B), \
801 (__v2df)_mm_setzero_pd(), \
802 (__mmask8)(U), (int)(C),\
805 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
807 #define _mm512_reduce_pd(A, B) __extension__ ({ \
808 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
809 (__v8df)_mm512_setzero_pd(), \
811 _MM_FROUND_CUR_DIRECTION); })
813 #define _mm512_mask_reduce_pd(W, U, A, B) __extension__ ({ \
814 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
815 (__v8df)(__m512d)(W), \
817 _MM_FROUND_CUR_DIRECTION); })
819 #define _mm512_maskz_reduce_pd(U, A, B) __extension__ ({ \
820 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
821 (__v8df)_mm512_setzero_pd(), \
823 _MM_FROUND_CUR_DIRECTION); })
825 #define _mm512_reduce_ps(A, B) __extension__ ({ \
826 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
827 (__v16sf)_mm512_setzero_ps(), \
829 _MM_FROUND_CUR_DIRECTION); })
831 #define _mm512_mask_reduce_ps(W, U, A, B) __extension__ ({ \
832 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
833 (__v16sf)(__m512)(W), \
835 _MM_FROUND_CUR_DIRECTION); })
837 #define _mm512_maskz_reduce_ps(U, A, B) __extension__ ({ \
838 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
839 (__v16sf)_mm512_setzero_ps(), \
841 _MM_FROUND_CUR_DIRECTION); })
843 #define _mm512_reduce_round_pd(A, B, R) __extension__ ({\
844 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
845 (__v8df)_mm512_setzero_pd(), \
846 (__mmask8)-1, (int)(R)); })
848 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) __extension__ ({\
849 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
850 (__v8df)(__m512d)(W), \
851 (__mmask8)(U), (int)(R)); })
853 #define _mm512_maskz_reduce_round_pd(U, A, B, R) __extension__ ({\
854 (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
855 (__v8df)_mm512_setzero_pd(), \
856 (__mmask8)(U), (int)(R)); })
858 #define _mm512_reduce_round_ps(A, B, R) __extension__ ({\
859 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
860 (__v16sf)_mm512_setzero_ps(), \
861 (__mmask16)-1, (int)(R)); })
863 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) __extension__ ({\
864 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
865 (__v16sf)(__m512)(W), \
866 (__mmask16)(U), (int)(R)); })
868 #define _mm512_maskz_reduce_round_ps(U, A, B, R) __extension__ ({\
869 (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
870 (__v16sf)_mm512_setzero_ps(), \
871 (__mmask16)(U), (int)(R)); })
873 #define _mm_reduce_ss(A, B, C) __extension__ ({ \
874 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
875 (__v4sf)(__m128)(B), \
876 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
877 (int)(C), _MM_FROUND_CUR_DIRECTION); })
879 #define _mm_mask_reduce_ss(W, U, A, B, C) __extension__ ({ \
880 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
881 (__v4sf)(__m128)(B), \
882 (__v4sf)(__m128)(W), (__mmask8)(U), \
883 (int)(C), _MM_FROUND_CUR_DIRECTION); })
885 #define _mm_maskz_reduce_ss(U, A, B, C) __extension__ ({ \
886 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
887 (__v4sf)(__m128)(B), \
888 (__v4sf)_mm_setzero_ps(), \
889 (__mmask8)(U), (int)(C), \
890 _MM_FROUND_CUR_DIRECTION); })
892 #define _mm_reduce_round_ss(A, B, C, R) __extension__ ({ \
893 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
894 (__v4sf)(__m128)(B), \
895 (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
896 (int)(C), (int)(R)); })
898 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) __extension__ ({ \
899 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
900 (__v4sf)(__m128)(B), \
901 (__v4sf)(__m128)(W), (__mmask8)(U), \
902 (int)(C), (int)(R)); })
904 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) __extension__ ({ \
905 (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
906 (__v4sf)(__m128)(B), \
907 (__v4sf)_mm_setzero_ps(), \
908 (__mmask8)(U), (int)(C), (int)(R)); })
910 #define _mm_reduce_sd(A, B, C) __extension__ ({ \
911 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
912 (__v2df)(__m128d)(B), \
913 (__v2df)_mm_setzero_pd(), \
914 (__mmask8)-1, (int)(C), \
915 _MM_FROUND_CUR_DIRECTION); })
917 #define _mm_mask_reduce_sd(W, U, A, B, C) __extension__ ({ \
918 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
919 (__v2df)(__m128d)(B), \
920 (__v2df)(__m128d)(W), (__mmask8)(U), \
921 (int)(C), _MM_FROUND_CUR_DIRECTION); })
923 #define _mm_maskz_reduce_sd(U, A, B, C) __extension__ ({ \
924 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
925 (__v2df)(__m128d)(B), \
926 (__v2df)_mm_setzero_pd(), \
927 (__mmask8)(U), (int)(C), \
928 _MM_FROUND_CUR_DIRECTION); })
930 #define _mm_reduce_round_sd(A, B, C, R) __extension__ ({ \
931 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
932 (__v2df)(__m128d)(B), \
933 (__v2df)_mm_setzero_pd(), \
934 (__mmask8)-1, (int)(C), (int)(R)); })
936 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) __extension__ ({ \
937 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
938 (__v2df)(__m128d)(B), \
939 (__v2df)(__m128d)(W), (__mmask8)(U), \
940 (int)(C), (int)(R)); })
942 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) __extension__ ({ \
943 (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
944 (__v2df)(__m128d)(B), \
945 (__v2df)_mm_setzero_pd(), \
946 (__mmask8)(U), (int)(C), (int)(R)); })
951 return (
__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
957 return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
963 return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
969 return (
__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
976 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
984 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
992 return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A,
1000 return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
1001 0, 1, 2, 3, 4, 5, 6, 7,
1002 0, 1, 2, 3, 4, 5, 6, 7);
1008 return (__m512)__builtin_ia32_selectps_512((
__mmask8)__M,
1016 return (__m512)__builtin_ia32_selectps_512((
__mmask8)__M,
1024 return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1025 0, 1, 0, 1, 0, 1, 0, 1);
1031 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
1039 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
1047 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1055 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1063 return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A,
1071 return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1072 0, 1, 2, 3, 4, 5, 6, 7,
1073 0, 1, 2, 3, 4, 5, 6, 7);
1079 return (__m512i)__builtin_ia32_selectd_512((
__mmask8)__M,
1087 return (__m512i)__builtin_ia32_selectd_512((
__mmask8)__M,
1095 return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1096 0, 1, 0, 1, 0, 1, 0, 1);
1102 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1110 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1115 #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
1116 (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \
1117 (__v16sf)_mm512_undefined_ps(), \
1118 ((imm) & 1) ? 8 : 0, \
1119 ((imm) & 1) ? 9 : 1, \
1120 ((imm) & 1) ? 10 : 2, \
1121 ((imm) & 1) ? 11 : 3, \
1122 ((imm) & 1) ? 12 : 4, \
1123 ((imm) & 1) ? 13 : 5, \
1124 ((imm) & 1) ? 14 : 6, \
1125 ((imm) & 1) ? 15 : 7); })
1127 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
1128 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
1129 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
1132 #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
1133 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
1134 (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
1135 (__v8sf)_mm256_setzero_ps()); })
1137 #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
1138 (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \
1139 (__v8df)_mm512_undefined_pd(), \
1140 0 + ((imm) & 0x3) * 2, \
1141 1 + ((imm) & 0x3) * 2); })
1143 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1144 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1145 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
1148 #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1149 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1150 (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
1151 (__v2df)_mm_setzero_pd()); })
1153 #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
1154 (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \
1155 (__v16si)_mm512_undefined_epi32(), \
1156 ((imm) & 1) ? 8 : 0, \
1157 ((imm) & 1) ? 9 : 1, \
1158 ((imm) & 1) ? 10 : 2, \
1159 ((imm) & 1) ? 11 : 3, \
1160 ((imm) & 1) ? 12 : 4, \
1161 ((imm) & 1) ? 13 : 5, \
1162 ((imm) & 1) ? 14 : 6, \
1163 ((imm) & 1) ? 15 : 7); })
1165 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
1166 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
1167 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
1170 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
1171 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
1172 (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
1173 (__v8si)_mm256_setzero_si256()); })
1175 #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
1176 (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \
1177 (__v8di)_mm512_undefined_epi32(), \
1178 0 + ((imm) & 0x3) * 2, \
1179 1 + ((imm) & 0x3) * 2); })
1181 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1182 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
1183 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
1186 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1187 (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
1188 (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
1189 (__v2di)_mm_setzero_di()); })
1191 #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
1192 (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
1193 (__v16sf)_mm512_castps256_ps512((__m256)(B)),\
1194 ((imm) & 0x1) ? 0 : 16, \
1195 ((imm) & 0x1) ? 1 : 17, \
1196 ((imm) & 0x1) ? 2 : 18, \
1197 ((imm) & 0x1) ? 3 : 19, \
1198 ((imm) & 0x1) ? 4 : 20, \
1199 ((imm) & 0x1) ? 5 : 21, \
1200 ((imm) & 0x1) ? 6 : 22, \
1201 ((imm) & 0x1) ? 7 : 23, \
1202 ((imm) & 0x1) ? 16 : 8, \
1203 ((imm) & 0x1) ? 17 : 9, \
1204 ((imm) & 0x1) ? 18 : 10, \
1205 ((imm) & 0x1) ? 19 : 11, \
1206 ((imm) & 0x1) ? 20 : 12, \
1207 ((imm) & 0x1) ? 21 : 13, \
1208 ((imm) & 0x1) ? 22 : 14, \
1209 ((imm) & 0x1) ? 23 : 15); })
1211 #define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
1212 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1213 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1216 #define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
1217 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1218 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1219 (__v16sf)_mm512_setzero_ps()); })
1221 #define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
1222 (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
1223 (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\
1224 (((imm) & 0x3) == 0) ? 8 : 0, \
1225 (((imm) & 0x3) == 0) ? 9 : 1, \
1226 (((imm) & 0x3) == 1) ? 8 : 2, \
1227 (((imm) & 0x3) == 1) ? 9 : 3, \
1228 (((imm) & 0x3) == 2) ? 8 : 4, \
1229 (((imm) & 0x3) == 2) ? 9 : 5, \
1230 (((imm) & 0x3) == 3) ? 8 : 6, \
1231 (((imm) & 0x3) == 3) ? 9 : 7); })
1233 #define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1234 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1235 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1238 #define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1239 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1240 (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1241 (__v8df)_mm512_setzero_pd()); })
1243 #define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
1244 (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
1245 (__v16si)_mm512_castsi256_si512((__m256i)(B)),\
1246 ((imm) & 0x1) ? 0 : 16, \
1247 ((imm) & 0x1) ? 1 : 17, \
1248 ((imm) & 0x1) ? 2 : 18, \
1249 ((imm) & 0x1) ? 3 : 19, \
1250 ((imm) & 0x1) ? 4 : 20, \
1251 ((imm) & 0x1) ? 5 : 21, \
1252 ((imm) & 0x1) ? 6 : 22, \
1253 ((imm) & 0x1) ? 7 : 23, \
1254 ((imm) & 0x1) ? 16 : 8, \
1255 ((imm) & 0x1) ? 17 : 9, \
1256 ((imm) & 0x1) ? 18 : 10, \
1257 ((imm) & 0x1) ? 19 : 11, \
1258 ((imm) & 0x1) ? 20 : 12, \
1259 ((imm) & 0x1) ? 21 : 13, \
1260 ((imm) & 0x1) ? 22 : 14, \
1261 ((imm) & 0x1) ? 23 : 15); })
1263 #define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
1264 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1265 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1268 #define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
1269 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1270 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1271 (__v16si)_mm512_setzero_si512()); })
1273 #define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
1274 (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
1275 (__v8di)_mm512_castsi128_si512((__m128i)(B)),\
1276 (((imm) & 0x3) == 0) ? 8 : 0, \
1277 (((imm) & 0x3) == 0) ? 9 : 1, \
1278 (((imm) & 0x3) == 1) ? 8 : 2, \
1279 (((imm) & 0x3) == 1) ? 9 : 3, \
1280 (((imm) & 0x3) == 2) ? 8 : 4, \
1281 (((imm) & 0x3) == 2) ? 9 : 5, \
1282 (((imm) & 0x3) == 3) ? 8 : 6, \
1283 (((imm) & 0x3) == 3) ? 9 : 7); })
1285 #define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1286 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1287 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1290 #define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1291 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1292 (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1293 (__v8di)_mm512_setzero_si512()); })
1295 #define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1296 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1297 (int)(imm), (__mmask16)(U)); })
1299 #define _mm512_fpclass_ps_mask(A, imm) __extension__ ({ \
1300 (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1301 (int)(imm), (__mmask16)-1); })
1303 #define _mm512_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1304 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1307 #define _mm512_fpclass_pd_mask(A, imm) __extension__ ({ \
1308 (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1311 #define _mm_fpclass_sd_mask(A, imm) __extension__ ({ \
1312 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1315 #define _mm_mask_fpclass_sd_mask(U, A, imm) __extension__ ({ \
1316 (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1319 #define _mm_fpclass_ss_mask(A, imm) __extension__ ({ \
1320 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1323 #define _mm_mask_fpclass_ss_mask(U, A, imm) __extension__ ({ \
1324 (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1327 #undef __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x2(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_epu64(__mmask8 __U, __m256 __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_xor_pd(__m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttpd_epi64(__mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS _mm512_setzero_pd(void)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtepi64_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_epi64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttpd_epi64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu64(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_andnot_pd(__m512d __A, __m512d __B)
static __inline __m512 __DEFAULT_FN_ATTRS _mm512_setzero_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttpd_epu64(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_or_pd(__m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttps_epi64(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_undefined_ps(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_movm_epi32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_andnot_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm512_movepi64_mask(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_epu64(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_xor_ps(__m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttps_epu64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtpd_epu64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttps_epu64(__mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepu64_pd(__m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_cvtepu64_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvttpd_epu64(__mmask8 __U, __m512d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i32x8(__m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtpd_epi64(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i64x2(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtpd_epu64(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i32x2(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_broadcast_f64x2(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epu64(__m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_cvtepi64_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epi64(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_cvtps_epi64(__mmask8 __U, __m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x8(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtpd_epi64(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtps_epu64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_and_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_movm_epi64(__mmask8 __A)
static __inline __m512i __DEFAULT_FN_ATTRS _mm512_setzero_si512(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvtps_epi64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_and_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttpd_epu64(__m512i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_or_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttpd_epi64(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_cvttps_epi64(__m512i __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_movepi32_mask(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvtps_epi64(__m256 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)