25 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLDQINTRIN_H
29 #define __AVX512VLDQINTRIN_H
32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
36 return (__m256i) ((__v4du) __A * (__v4du) __B);
41 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
48 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
55 return (__m128i) ((__v2du) __A * (__v2du) __B);
60 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
67 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
74 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
81 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
88 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
95 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
102 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
109 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
116 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
123 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
130 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
137 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
144 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
151 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
158 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
165 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
172 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
179 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
186 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
193 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
200 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
207 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
214 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
221 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
228 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
235 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
242 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
249 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
256 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
263 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
270 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
277 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
284 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
291 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
298 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
305 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
312 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
319 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
326 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
333 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
340 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
347 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
354 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
361 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
368 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
375 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
382 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
389 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
396 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
403 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
410 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
417 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
424 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
431 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
438 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
445 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
452 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
459 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
466 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
473 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
480 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
487 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
494 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
501 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
508 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
515 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
522 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
529 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
536 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
543 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
550 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
557 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
564 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
571 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
578 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
585 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
592 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
599 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
606 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
613 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
620 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
627 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
634 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
641 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
648 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
655 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
662 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
669 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
676 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
683 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
690 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
697 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
704 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
711 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
718 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
725 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
732 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
739 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
746 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
753 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
760 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
767 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
774 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
781 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
788 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
795 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
800 #define _mm_range_pd(A, B, C) __extension__ ({ \
801 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
802 (__v2df)(__m128d)(B), (int)(C), \
803 (__v2df)_mm_setzero_pd(), \
806 #define _mm_mask_range_pd(W, U, A, B, C) __extension__ ({ \
807 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
808 (__v2df)(__m128d)(B), (int)(C), \
809 (__v2df)(__m128d)(W), \
812 #define _mm_maskz_range_pd(U, A, B, C) __extension__ ({ \
813 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
814 (__v2df)(__m128d)(B), (int)(C), \
815 (__v2df)_mm_setzero_pd(), \
818 #define _mm256_range_pd(A, B, C) __extension__ ({ \
819 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
820 (__v4df)(__m256d)(B), (int)(C), \
821 (__v4df)_mm256_setzero_pd(), \
824 #define _mm256_mask_range_pd(W, U, A, B, C) __extension__ ({ \
825 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
826 (__v4df)(__m256d)(B), (int)(C), \
827 (__v4df)(__m256d)(W), \
830 #define _mm256_maskz_range_pd(U, A, B, C) __extension__ ({ \
831 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
832 (__v4df)(__m256d)(B), (int)(C), \
833 (__v4df)_mm256_setzero_pd(), \
836 #define _mm_range_ps(A, B, C) __extension__ ({ \
837 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
838 (__v4sf)(__m128)(B), (int)(C), \
839 (__v4sf)_mm_setzero_ps(), \
842 #define _mm_mask_range_ps(W, U, A, B, C) __extension__ ({ \
843 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
844 (__v4sf)(__m128)(B), (int)(C), \
845 (__v4sf)(__m128)(W), (__mmask8)(U)); })
847 #define _mm_maskz_range_ps(U, A, B, C) __extension__ ({ \
848 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
849 (__v4sf)(__m128)(B), (int)(C), \
850 (__v4sf)_mm_setzero_ps(), \
853 #define _mm256_range_ps(A, B, C) __extension__ ({ \
854 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
855 (__v8sf)(__m256)(B), (int)(C), \
856 (__v8sf)_mm256_setzero_ps(), \
859 #define _mm256_mask_range_ps(W, U, A, B, C) __extension__ ({ \
860 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
861 (__v8sf)(__m256)(B), (int)(C), \
862 (__v8sf)(__m256)(W), (__mmask8)(U)); })
864 #define _mm256_maskz_range_ps(U, A, B, C) __extension__ ({ \
865 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
866 (__v8sf)(__m256)(B), (int)(C), \
867 (__v8sf)_mm256_setzero_ps(), \
870 #define _mm_reduce_pd(A, B) __extension__ ({ \
871 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
872 (__v2df)_mm_setzero_pd(), \
875 #define _mm_mask_reduce_pd(W, U, A, B) __extension__ ({ \
876 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
877 (__v2df)(__m128d)(W), \
880 #define _mm_maskz_reduce_pd(U, A, B) __extension__ ({ \
881 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
882 (__v2df)_mm_setzero_pd(), \
885 #define _mm256_reduce_pd(A, B) __extension__ ({ \
886 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
887 (__v4df)_mm256_setzero_pd(), \
890 #define _mm256_mask_reduce_pd(W, U, A, B) __extension__ ({ \
891 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
892 (__v4df)(__m256d)(W), \
895 #define _mm256_maskz_reduce_pd(U, A, B) __extension__ ({ \
896 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
897 (__v4df)_mm256_setzero_pd(), \
900 #define _mm_reduce_ps(A, B) __extension__ ({ \
901 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
902 (__v4sf)_mm_setzero_ps(), \
905 #define _mm_mask_reduce_ps(W, U, A, B) __extension__ ({ \
906 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
907 (__v4sf)(__m128)(W), \
910 #define _mm_maskz_reduce_ps(U, A, B) __extension__ ({ \
911 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
912 (__v4sf)_mm_setzero_ps(), \
915 #define _mm256_reduce_ps(A, B) __extension__ ({ \
916 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
917 (__v8sf)_mm256_setzero_ps(), \
920 #define _mm256_mask_reduce_ps(W, U, A, B) __extension__ ({ \
921 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
922 (__v8sf)(__m256)(W), \
925 #define _mm256_maskz_reduce_ps(U, A, B) __extension__ ({ \
926 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
927 (__v8sf)_mm256_setzero_ps(), \
933 return (
__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
939 return (
__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
945 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
951 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
957 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
963 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
969 return (
__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
975 return (
__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
981 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
989 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
997 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A,
1005 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1012 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__M,
1020 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__M,
1028 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1036 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1044 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A,
1052 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1060 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1068 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A,
1076 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1083 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
1091 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
1096 #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
1097 (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \
1098 (__v4df)_mm256_undefined_pd(), \
1099 ((imm) & 1) ? 2 : 0, \
1100 ((imm) & 1) ? 3 : 1); })
1102 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
1103 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1104 (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
1107 #define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
1108 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
1109 (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
1110 (__v2df)_mm_setzero_pd()); })
1112 #define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
1113 (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \
1114 (__v4di)_mm256_undefined_si256(), \
1115 ((imm) & 1) ? 2 : 0, \
1116 ((imm) & 1) ? 3 : 1); })
1118 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
1119 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
1120 (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
1123 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
1124 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
1125 (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
1126 (__v2di)_mm_setzero_di()); })
1128 #define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
1129 (__m256d)__builtin_shufflevector((__v4df)(A), \
1130 (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \
1131 ((imm) & 0x1) ? 0 : 4, \
1132 ((imm) & 0x1) ? 1 : 5, \
1133 ((imm) & 0x1) ? 4 : 2, \
1134 ((imm) & 0x1) ? 5 : 3); })
1136 #define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
1137 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1138 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1141 #define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
1142 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1143 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1144 (__v4df)_mm256_setzero_pd()); })
1146 #define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
1147 (__m256i)__builtin_shufflevector((__v4di)(A), \
1148 (__v4di)_mm256_castsi128_si256((__m128i)(B)), \
1149 ((imm) & 0x1) ? 0 : 4, \
1150 ((imm) & 0x1) ? 1 : 5, \
1151 ((imm) & 0x1) ? 4 : 2, \
1152 ((imm) & 0x1) ? 5 : 3); })
1154 #define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
1155 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1156 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1159 #define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
1160 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1161 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1162 (__v4di)_mm256_setzero_si256()); })
1164 #define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1165 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1168 #define _mm_fpclass_pd_mask(A, imm) __extension__ ({ \
1169 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1172 #define _mm256_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
1173 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1176 #define _mm256_fpclass_pd_mask(A, imm) __extension__ ({ \
1177 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1180 #define _mm_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1181 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1184 #define _mm_fpclass_ps_mask(A, imm) __extension__ ({ \
1185 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1188 #define _mm256_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
1189 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1192 #define _mm256_fpclass_ps_mask(A, imm) __extension__ ({ \
1193 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1196 #undef __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi64_ps(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_movepi64_mask(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi64(__m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_broadcast_f64x2(__m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epu64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi64_pd(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_xor_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps(__m256 __a, __m256 __b)
Performs a bitwise OR of two 256-bit vectors of [8 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcast_i32x2(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi64(__m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epu64(__m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtpd_epu64(__m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epu64(__m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_cvtepi64_ps(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_movepi32_mask(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_movm_epi32(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi64(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movm_epi32(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_cvtepu64_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epu64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_cvtepu64_pd(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
#define __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi64_pd(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_f32x2(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcast_i64x2(__m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the valu...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepu64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float], using the one's complement of the value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_movm_epi64(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi64(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the value...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi64(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepu64_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepu64_ps(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi64(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epu64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_movepi32_mask(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi64(__m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b)
Performs a bitwise OR of two 128-bit vectors of [4 x float].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd(__m256d __a, __m256d __b)
Performs a bitwise XOR of two 256-bit vectors of [4 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi64(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float].
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_movepi64_mask(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movm_epi64(__mmask8 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcast_i32x2(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttpd_epu64(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttpd_epu64(__mmask8 __U, __m128d __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd(__m256d __a, __m256d __b)
Performs a bitwise OR of two 256-bit vectors of [4 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)