10 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 13 #ifndef __AVX512FINTRIN_H 14 #define __AVX512FINTRIN_H 20 typedef long long __v8di
__attribute__((__vector_size__(64)));
24 typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
25 typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
26 typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
27 typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
29 typedef float __m512
__attribute__((__vector_size__(64), __aligned__(64)));
30 typedef double __m512d
__attribute__((__vector_size__(64), __aligned__(64)));
31 typedef long long __m512i
__attribute__((__vector_size__(64), __aligned__(64)));
33 typedef float __m512_u
__attribute__((__vector_size__(64), __aligned__(1)));
34 typedef double __m512d_u
__attribute__((__vector_size__(64), __aligned__(1)));
35 typedef long long __m512i_u
__attribute__((__vector_size__(64), __aligned__(1)));
41 #define _MM_FROUND_TO_NEAREST_INT 0x00 42 #define _MM_FROUND_TO_NEG_INF 0x01 43 #define _MM_FROUND_TO_POS_INF 0x02 44 #define _MM_FROUND_TO_ZERO 0x03 45 #define _MM_FROUND_CUR_DIRECTION 0x04 55 #define _MM_CMPINT_GE _MM_CMPINT_NLT 57 #define _MM_CMPINT_GT _MM_CMPINT_NLE 166 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) 167 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) 168 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 175 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
178 #define _mm512_setzero_epi32 _mm512_setzero_si512 183 return (__m512d)__builtin_ia32_undef512();
189 return (__m512)__builtin_ia32_undef512();
195 return (__m512)__builtin_ia32_undef512();
201 return (__m512i)__builtin_ia32_undef512();
207 return (__m512i)__builtin_shufflevector((
__v4si) __A, (
__v4si) __A,
208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
214 return (__m512i)__builtin_ia32_selectd_512(__M,
222 return (__m512i)__builtin_ia32_selectd_512(__M,
230 return (__m512i)__builtin_shufflevector((
__v2di) __A, (
__v2di) __A,
231 0, 0, 0, 0, 0, 0, 0, 0);
237 return (__m512i)__builtin_ia32_selectq_512(__M,
246 return (__m512i)__builtin_ia32_selectq_512(__M,
255 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
256 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
259 #define _mm512_setzero _mm512_setzero_ps 264 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
270 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
271 __w, __w, __w, __w, __w, __w, __w, __w };
277 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
283 return __extension__ (__m512i)(__v64qi){
284 __w, __w, __w, __w, __w, __w, __w, __w,
285 __w, __w, __w, __w, __w, __w, __w, __w,
286 __w, __w, __w, __w, __w, __w, __w, __w,
287 __w, __w, __w, __w, __w, __w, __w, __w,
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w };
297 return __extension__ (__m512i)(__v32hi){
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w };
307 return __extension__ (__m512i)(__v16si){
308 __s, __s, __s, __s, __s, __s, __s, __s,
309 __s, __s, __s, __s, __s, __s, __s, __s };
315 return (__m512i)__builtin_ia32_selectd_512(__M,
323 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
329 return (__m512i)__builtin_ia32_selectq_512(__M,
337 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
344 return __extension__ (__m512i)(__v16si)
353 return __extension__ (__m512i) (__v8di)
360 return __extension__ (__m512d)
367 return __extension__ (__m512)
372 #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 373 _mm512_set4_epi32((e3),(e2),(e1),(e0)) 375 #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 376 _mm512_set4_epi64((e3),(e2),(e1),(e0)) 378 #define _mm512_setr4_pd(e0,e1,e2,e3) \ 379 _mm512_set4_pd((e3),(e2),(e1),(e0)) 381 #define _mm512_setr4_ps(e0,e1,e2,e3) \ 382 _mm512_set4_ps((e3),(e2),(e1),(e0)) 387 return (__m512d)__builtin_shufflevector((
__v2df) __A, (
__v2df) __A,
388 0, 0, 0, 0, 0, 0, 0, 0);
396 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
402 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
403 -1, -1, -1, -1, -1, -1, -1, -1);
409 return __builtin_shufflevector(__a, __a, 0, 1);
415 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
421 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
427 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
433 return (__m512) (
__A);
439 return (__m512i) (
__A);
445 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
451 return (__m512d) (
__A);
457 return (__m512i) (
__A);
463 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
469 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
475 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
481 return (__m512) (
__A);
487 return (__m512d) (
__A);
493 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
499 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
505 return (__mmask16)
__a;
549 return __builtin_shufflevector((__v4df)__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
567 return __builtin_shufflevector((__v4sf)__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
585 return __builtin_shufflevector((__v8sf)__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
621 return __builtin_shufflevector((__v4di)__a, (__v4di)
_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
628 return (__m512i)((__v16su)__a & (__v16su)
__b);
634 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
649 return (__m512i)((__v8du)__a & (__v8du)
__b);
655 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
670 return (__m512i)(~(__v8du)__A & (__v8du)
__B);
676 return (__m512i)(~(__v16su)__A & (__v16su)
__B);
682 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
697 return (__m512i)(~(__v8du)__A & (__v8du)
__B);
703 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
718 return (__m512i)((__v16su)__a | (__v16su)
__b);
724 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
738 return (__m512i)((__v8du)__a | (__v8du)
__b);
744 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
758 return (__m512i)((__v16su)__a ^ (__v16su)
__b);
764 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
778 return (__m512i)((__v8du)__a ^ (__v8du)
__b);
784 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
798 return (__m512i)((__v8du)__a & (__v8du)
__b);
804 return (__m512i)((__v8du)__a | (__v8du)
__b);
810 return (__m512i)((__v8du)__a ^ (__v8du)
__b);
818 return (__m512d)((__v8df)__a + (__v8df)
__b);
824 return (__m512)((__v16sf)__a + (__v16sf)
__b);
830 return (__m512d)((__v8df)__a * (__v8df)
__b);
836 return (__m512)((__v16sf)__a * (__v16sf)
__b);
842 return (__m512d)((__v8df)__a - (__v8df)
__b);
848 return (__m512)((__v16sf)__a - (__v16sf)
__b);
854 return (__m512i) ((__v8du) __A + (__v8du)
__B);
860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
868 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
876 return (__m512i) ((__v8du) __A - (__v8du)
__B);
882 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
890 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
898 return (__m512i) ((__v16su) __A + (__v16su)
__B);
904 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
912 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
920 return (__m512i) ((__v16su) __A - (__v16su)
__B);
926 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
934 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
939 #define _mm512_max_round_pd(A, B, R) \ 940 (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ 941 (__v8df)(__m512d)(B), (int)(R)) 943 #define _mm512_mask_max_round_pd(W, U, A, B, R) \ 944 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 945 (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 948 #define _mm512_maskz_max_round_pd(U, A, B, R) \ 949 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 950 (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 951 (__v8df)_mm512_setzero_pd()) 956 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
963 return (__m512d)__builtin_ia32_selectpd_512(__U,
971 return (__m512d)__builtin_ia32_selectpd_512(__U,
976 #define _mm512_max_round_ps(A, B, R) \ 977 (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ 978 (__v16sf)(__m512)(B), (int)(R)) 980 #define _mm512_mask_max_round_ps(W, U, A, B, R) \ 981 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 982 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 985 #define _mm512_maskz_max_round_ps(U, A, B, R) \ 986 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 987 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 988 (__v16sf)_mm512_setzero_ps()) 993 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1000 return (__m512)__builtin_ia32_selectps_512(__U,
1008 return (__m512)__builtin_ia32_selectps_512(__U,
1015 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1024 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1031 #define _mm_max_round_ss(A, B, R) \ 1032 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1033 (__v4sf)(__m128)(B), \ 1034 (__v4sf)_mm_setzero_ps(), \ 1035 (__mmask8)-1, (int)(R)) 1037 #define _mm_mask_max_round_ss(W, U, A, B, R) \ 1038 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1039 (__v4sf)(__m128)(B), \ 1040 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1043 #define _mm_maskz_max_round_ss(U, A, B, R) \ 1044 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1045 (__v4sf)(__m128)(B), \ 1046 (__v4sf)_mm_setzero_ps(), \ 1047 (__mmask8)(U), (int)(R)) 1051 return (__m128d) __builtin_ia32_maxsd_round_mask ((
__v2df) __A,
1060 return (__m128d) __builtin_ia32_maxsd_round_mask ((
__v2df) __A,
1067 #define _mm_max_round_sd(A, B, R) \ 1068 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1069 (__v2df)(__m128d)(B), \ 1070 (__v2df)_mm_setzero_pd(), \ 1071 (__mmask8)-1, (int)(R)) 1073 #define _mm_mask_max_round_sd(W, U, A, B, R) \ 1074 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1075 (__v2df)(__m128d)(B), \ 1076 (__v2df)(__m128d)(W), \ 1077 (__mmask8)(U), (int)(R)) 1079 #define _mm_maskz_max_round_sd(U, A, B, R) \ 1080 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1081 (__v2df)(__m128d)(B), \ 1082 (__v2df)_mm_setzero_pd(), \ 1083 (__mmask8)(U), (int)(R)) 1085 static __inline __m512i
1089 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1095 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1103 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1111 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1117 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1125 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1133 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1139 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1147 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1155 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1161 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1169 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1174 #define _mm512_min_round_pd(A, B, R) \ 1175 (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ 1176 (__v8df)(__m512d)(B), (int)(R)) 1178 #define _mm512_mask_min_round_pd(W, U, A, B, R) \ 1179 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1180 (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1183 #define _mm512_maskz_min_round_pd(U, A, B, R) \ 1184 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1185 (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1186 (__v8df)_mm512_setzero_pd()) 1191 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1198 return (__m512d)__builtin_ia32_selectpd_512(__U,
1206 return (__m512d)__builtin_ia32_selectpd_512(__U,
1211 #define _mm512_min_round_ps(A, B, R) \ 1212 (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ 1213 (__v16sf)(__m512)(B), (int)(R)) 1215 #define _mm512_mask_min_round_ps(W, U, A, B, R) \ 1216 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1217 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1220 #define _mm512_maskz_min_round_ps(U, A, B, R) \ 1221 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1222 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1223 (__v16sf)_mm512_setzero_ps()) 1228 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1235 return (__m512)__builtin_ia32_selectps_512(__U,
1243 return (__m512)__builtin_ia32_selectps_512(__U,
1250 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1259 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1266 #define _mm_min_round_ss(A, B, R) \ 1267 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1268 (__v4sf)(__m128)(B), \ 1269 (__v4sf)_mm_setzero_ps(), \ 1270 (__mmask8)-1, (int)(R)) 1272 #define _mm_mask_min_round_ss(W, U, A, B, R) \ 1273 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1274 (__v4sf)(__m128)(B), \ 1275 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1278 #define _mm_maskz_min_round_ss(U, A, B, R) \ 1279 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1280 (__v4sf)(__m128)(B), \ 1281 (__v4sf)_mm_setzero_ps(), \ 1282 (__mmask8)(U), (int)(R)) 1286 return (__m128d) __builtin_ia32_minsd_round_mask ((
__v2df) __A,
1295 return (__m128d) __builtin_ia32_minsd_round_mask ((
__v2df) __A,
1302 #define _mm_min_round_sd(A, B, R) \ 1303 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1304 (__v2df)(__m128d)(B), \ 1305 (__v2df)_mm_setzero_pd(), \ 1306 (__mmask8)-1, (int)(R)) 1308 #define _mm_mask_min_round_sd(W, U, A, B, R) \ 1309 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1310 (__v2df)(__m128d)(B), \ 1311 (__v2df)(__m128d)(W), \ 1312 (__mmask8)(U), (int)(R)) 1314 #define _mm_maskz_min_round_sd(U, A, B, R) \ 1315 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1316 (__v2df)(__m128d)(B), \ 1317 (__v2df)_mm_setzero_pd(), \ 1318 (__mmask8)(U), (int)(R)) 1320 static __inline __m512i
1324 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1330 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1338 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1346 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1352 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1360 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1368 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1374 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1382 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1390 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1396 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1404 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1412 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1418 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1426 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1434 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1440 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1448 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1456 return (__m512i) ((__v16su) __A * (__v16su)
__B);
1462 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1470 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1477 return (__m512i) ((__v8du) __A * (__v8du)
__B);
1482 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1487 #define _mm512_sqrt_round_pd(A, R) \ 1488 (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)) 1490 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ 1491 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1492 (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1493 (__v8df)(__m512d)(W)) 1495 #define _mm512_maskz_sqrt_round_pd(U, A, R) \ 1496 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1497 (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1498 (__v8df)_mm512_setzero_pd()) 1503 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1510 return (__m512d)__builtin_ia32_selectpd_512(__U,
1518 return (__m512d)__builtin_ia32_selectpd_512(__U,
1523 #define _mm512_sqrt_round_ps(A, R) \ 1524 (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)) 1526 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ 1527 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1528 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1529 (__v16sf)(__m512)(W)) 1531 #define _mm512_maskz_sqrt_round_ps(U, A, R) \ 1532 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1533 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1534 (__v16sf)_mm512_setzero_ps()) 1539 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1546 return (__m512)__builtin_ia32_selectps_512(__U,
1554 return (__m512)__builtin_ia32_selectps_512(__U,
1562 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1570 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1578 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1587 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1596 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1604 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1613 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1623 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1632 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1641 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((
__v2df) __A,
1651 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (
__v2df) __A,
1660 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (
__v2df) __A,
1669 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1678 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1686 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1695 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1704 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1712 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1721 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1731 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1740 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1749 return (__m128d) __builtin_ia32_rcp14sd_mask ((
__v2df) __A,
1759 return (__m128d) __builtin_ia32_rcp14sd_mask ( (
__v2df) __A,
1768 return (__m128d) __builtin_ia32_rcp14sd_mask ( (
__v2df) __A,
1777 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1786 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1795 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1804 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1813 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1822 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1831 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1840 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1849 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1855 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1863 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1871 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1877 return (__m512i)__builtin_ia32_selectd_512(__U,
1885 return (__m512i)__builtin_ia32_selectd_512(__U,
1893 return __builtin_ia32_selectss_128(__U, __A, __W);
1902 #define _mm_add_round_ss(A, B, R) \ 1903 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1904 (__v4sf)(__m128)(B), \ 1905 (__v4sf)_mm_setzero_ps(), \ 1906 (__mmask8)-1, (int)(R)) 1908 #define _mm_mask_add_round_ss(W, U, A, B, R) \ 1909 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1910 (__v4sf)(__m128)(B), \ 1911 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1914 #define _mm_maskz_add_round_ss(U, A, B, R) \ 1915 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1916 (__v4sf)(__m128)(B), \ 1917 (__v4sf)_mm_setzero_ps(), \ 1918 (__mmask8)(U), (int)(R)) 1923 return __builtin_ia32_selectsd_128(__U, __A, __W);
1931 #define _mm_add_round_sd(A, B, R) \ 1932 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1933 (__v2df)(__m128d)(B), \ 1934 (__v2df)_mm_setzero_pd(), \ 1935 (__mmask8)-1, (int)(R)) 1937 #define _mm_mask_add_round_sd(W, U, A, B, R) \ 1938 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1939 (__v2df)(__m128d)(B), \ 1940 (__v2df)(__m128d)(W), \ 1941 (__mmask8)(U), (int)(R)) 1943 #define _mm_maskz_add_round_sd(U, A, B, R) \ 1944 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1945 (__v2df)(__m128d)(B), \ 1946 (__v2df)_mm_setzero_pd(), \ 1947 (__mmask8)(U), (int)(R)) 1951 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1958 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1965 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1972 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1977 #define _mm512_add_round_pd(A, B, R) \ 1978 (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ 1979 (__v8df)(__m512d)(B), (int)(R)) 1981 #define _mm512_mask_add_round_pd(W, U, A, B, R) \ 1982 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1983 (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 1984 (__v8df)(__m512d)(W)) 1986 #define _mm512_maskz_add_round_pd(U, A, B, R) \ 1987 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1988 (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 1989 (__v8df)_mm512_setzero_pd()) 1991 #define _mm512_add_round_ps(A, B, R) \ 1992 (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ 1993 (__v16sf)(__m512)(B), (int)(R)) 1995 #define _mm512_mask_add_round_ps(W, U, A, B, R) \ 1996 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1997 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 1998 (__v16sf)(__m512)(W)) 2000 #define _mm512_maskz_add_round_ps(U, A, B, R) \ 2001 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2002 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 2003 (__v16sf)_mm512_setzero_ps()) 2008 return __builtin_ia32_selectss_128(__U, __A, __W);
2016 #define _mm_sub_round_ss(A, B, R) \ 2017 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2018 (__v4sf)(__m128)(B), \ 2019 (__v4sf)_mm_setzero_ps(), \ 2020 (__mmask8)-1, (int)(R)) 2022 #define _mm_mask_sub_round_ss(W, U, A, B, R) \ 2023 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2024 (__v4sf)(__m128)(B), \ 2025 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2028 #define _mm_maskz_sub_round_ss(U, A, B, R) \ 2029 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2030 (__v4sf)(__m128)(B), \ 2031 (__v4sf)_mm_setzero_ps(), \ 2032 (__mmask8)(U), (int)(R)) 2037 return __builtin_ia32_selectsd_128(__U, __A, __W);
2046 #define _mm_sub_round_sd(A, B, R) \ 2047 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2048 (__v2df)(__m128d)(B), \ 2049 (__v2df)_mm_setzero_pd(), \ 2050 (__mmask8)-1, (int)(R)) 2052 #define _mm_mask_sub_round_sd(W, U, A, B, R) \ 2053 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2054 (__v2df)(__m128d)(B), \ 2055 (__v2df)(__m128d)(W), \ 2056 (__mmask8)(U), (int)(R)) 2058 #define _mm_maskz_sub_round_sd(U, A, B, R) \ 2059 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2060 (__v2df)(__m128d)(B), \ 2061 (__v2df)_mm_setzero_pd(), \ 2062 (__mmask8)(U), (int)(R)) 2066 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2073 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2080 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2087 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2092 #define _mm512_sub_round_pd(A, B, R) \ 2093 (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ 2094 (__v8df)(__m512d)(B), (int)(R)) 2096 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ 2097 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2098 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2099 (__v8df)(__m512d)(W)) 2101 #define _mm512_maskz_sub_round_pd(U, A, B, R) \ 2102 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2103 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2104 (__v8df)_mm512_setzero_pd()) 2106 #define _mm512_sub_round_ps(A, B, R) \ 2107 (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ 2108 (__v16sf)(__m512)(B), (int)(R)) 2110 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ 2111 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2112 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2113 (__v16sf)(__m512)(W)) 2115 #define _mm512_maskz_sub_round_ps(U, A, B, R) \ 2116 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2117 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2118 (__v16sf)_mm512_setzero_ps()) 2123 return __builtin_ia32_selectss_128(__U, __A, __W);
2131 #define _mm_mul_round_ss(A, B, R) \ 2132 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2133 (__v4sf)(__m128)(B), \ 2134 (__v4sf)_mm_setzero_ps(), \ 2135 (__mmask8)-1, (int)(R)) 2137 #define _mm_mask_mul_round_ss(W, U, A, B, R) \ 2138 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2139 (__v4sf)(__m128)(B), \ 2140 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2143 #define _mm_maskz_mul_round_ss(U, A, B, R) \ 2144 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2145 (__v4sf)(__m128)(B), \ 2146 (__v4sf)_mm_setzero_ps(), \ 2147 (__mmask8)(U), (int)(R)) 2152 return __builtin_ia32_selectsd_128(__U, __A, __W);
2161 #define _mm_mul_round_sd(A, B, R) \ 2162 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2163 (__v2df)(__m128d)(B), \ 2164 (__v2df)_mm_setzero_pd(), \ 2165 (__mmask8)-1, (int)(R)) 2167 #define _mm_mask_mul_round_sd(W, U, A, B, R) \ 2168 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2169 (__v2df)(__m128d)(B), \ 2170 (__v2df)(__m128d)(W), \ 2171 (__mmask8)(U), (int)(R)) 2173 #define _mm_maskz_mul_round_sd(U, A, B, R) \ 2174 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2175 (__v2df)(__m128d)(B), \ 2176 (__v2df)_mm_setzero_pd(), \ 2177 (__mmask8)(U), (int)(R)) 2181 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2188 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2195 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2202 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2207 #define _mm512_mul_round_pd(A, B, R) \ 2208 (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ 2209 (__v8df)(__m512d)(B), (int)(R)) 2211 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ 2212 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2213 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2214 (__v8df)(__m512d)(W)) 2216 #define _mm512_maskz_mul_round_pd(U, A, B, R) \ 2217 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2218 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2219 (__v8df)_mm512_setzero_pd()) 2221 #define _mm512_mul_round_ps(A, B, R) \ 2222 (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ 2223 (__v16sf)(__m512)(B), (int)(R)) 2225 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ 2226 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2227 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2228 (__v16sf)(__m512)(W)) 2230 #define _mm512_maskz_mul_round_ps(U, A, B, R) \ 2231 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2232 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2233 (__v16sf)_mm512_setzero_ps()) 2238 return __builtin_ia32_selectss_128(__U, __A, __W);
2247 #define _mm_div_round_ss(A, B, R) \ 2248 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2249 (__v4sf)(__m128)(B), \ 2250 (__v4sf)_mm_setzero_ps(), \ 2251 (__mmask8)-1, (int)(R)) 2253 #define _mm_mask_div_round_ss(W, U, A, B, R) \ 2254 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2255 (__v4sf)(__m128)(B), \ 2256 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2259 #define _mm_maskz_div_round_ss(U, A, B, R) \ 2260 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2261 (__v4sf)(__m128)(B), \ 2262 (__v4sf)_mm_setzero_ps(), \ 2263 (__mmask8)(U), (int)(R)) 2268 return __builtin_ia32_selectsd_128(__U, __A, __W);
2277 #define _mm_div_round_sd(A, B, R) \ 2278 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2279 (__v2df)(__m128d)(B), \ 2280 (__v2df)_mm_setzero_pd(), \ 2281 (__mmask8)-1, (int)(R)) 2283 #define _mm_mask_div_round_sd(W, U, A, B, R) \ 2284 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2285 (__v2df)(__m128d)(B), \ 2286 (__v2df)(__m128d)(W), \ 2287 (__mmask8)(U), (int)(R)) 2289 #define _mm_maskz_div_round_sd(U, A, B, R) \ 2290 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2291 (__v2df)(__m128d)(B), \ 2292 (__v2df)_mm_setzero_pd(), \ 2293 (__mmask8)(U), (int)(R)) 2298 return (__m512d)((__v8df)__a/(__v8df)
__b);
2303 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2310 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2318 return (__m512)((__v16sf)__a/(__v16sf)
__b);
2323 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2330 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2335 #define _mm512_div_round_pd(A, B, R) \ 2336 (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ 2337 (__v8df)(__m512d)(B), (int)(R)) 2339 #define _mm512_mask_div_round_pd(W, U, A, B, R) \ 2340 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2341 (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2342 (__v8df)(__m512d)(W)) 2344 #define _mm512_maskz_div_round_pd(U, A, B, R) \ 2345 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2346 (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2347 (__v8df)_mm512_setzero_pd()) 2349 #define _mm512_div_round_ps(A, B, R) \ 2350 (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ 2351 (__v16sf)(__m512)(B), (int)(R)) 2353 #define _mm512_mask_div_round_ps(W, U, A, B, R) \ 2354 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2355 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2356 (__v16sf)(__m512)(W)) 2358 #define _mm512_maskz_div_round_ps(U, A, B, R) \ 2359 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2360 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2361 (__v16sf)_mm512_setzero_ps()) 2363 #define _mm512_roundscale_ps(A, B) \ 2364 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2365 (__v16sf)_mm512_undefined_ps(), \ 2367 _MM_FROUND_CUR_DIRECTION) 2369 #define _mm512_mask_roundscale_ps(A, B, C, imm) \ 2370 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2371 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2372 _MM_FROUND_CUR_DIRECTION) 2374 #define _mm512_maskz_roundscale_ps(A, B, imm) \ 2375 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2376 (__v16sf)_mm512_setzero_ps(), \ 2378 _MM_FROUND_CUR_DIRECTION) 2380 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ 2381 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2382 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2385 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ 2386 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2387 (__v16sf)_mm512_setzero_ps(), \ 2388 (__mmask16)(A), (int)(R)) 2390 #define _mm512_roundscale_round_ps(A, imm, R) \ 2391 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 2392 (__v16sf)_mm512_undefined_ps(), \ 2393 (__mmask16)-1, (int)(R)) 2395 #define _mm512_roundscale_pd(A, B) \ 2396 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2397 (__v8df)_mm512_undefined_pd(), \ 2399 _MM_FROUND_CUR_DIRECTION) 2401 #define _mm512_mask_roundscale_pd(A, B, C, imm) \ 2402 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2403 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2404 _MM_FROUND_CUR_DIRECTION) 2406 #define _mm512_maskz_roundscale_pd(A, B, imm) \ 2407 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2408 (__v8df)_mm512_setzero_pd(), \ 2410 _MM_FROUND_CUR_DIRECTION) 2412 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ 2413 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2414 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2417 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ 2418 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2419 (__v8df)_mm512_setzero_pd(), \ 2420 (__mmask8)(A), (int)(R)) 2422 #define _mm512_roundscale_round_pd(A, imm, R) \ 2423 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 2424 (__v8df)_mm512_undefined_pd(), \ 2425 (__mmask8)-1, (int)(R)) 2427 #define _mm512_fmadd_round_pd(A, B, C, R) \ 2428 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2429 (__v8df)(__m512d)(B), \ 2430 (__v8df)(__m512d)(C), \ 2431 (__mmask8)-1, (int)(R)) 2434 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 2435 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2436 (__v8df)(__m512d)(B), \ 2437 (__v8df)(__m512d)(C), \ 2438 (__mmask8)(U), (int)(R)) 2441 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 2442 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2443 (__v8df)(__m512d)(B), \ 2444 (__v8df)(__m512d)(C), \ 2445 (__mmask8)(U), (int)(R)) 2448 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 2449 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2450 (__v8df)(__m512d)(B), \ 2451 (__v8df)(__m512d)(C), \ 2452 (__mmask8)(U), (int)(R)) 2455 #define _mm512_fmsub_round_pd(A, B, C, R) \ 2456 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2457 (__v8df)(__m512d)(B), \ 2458 -(__v8df)(__m512d)(C), \ 2459 (__mmask8)-1, (int)(R)) 2462 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 2463 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2464 (__v8df)(__m512d)(B), \ 2465 -(__v8df)(__m512d)(C), \ 2466 (__mmask8)(U), (int)(R)) 2469 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 2470 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2471 (__v8df)(__m512d)(B), \ 2472 -(__v8df)(__m512d)(C), \ 2473 (__mmask8)(U), (int)(R)) 2476 #define _mm512_fnmadd_round_pd(A, B, C, R) \ 2477 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2478 (__v8df)(__m512d)(B), \ 2479 (__v8df)(__m512d)(C), \ 2480 (__mmask8)-1, (int)(R)) 2483 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 2484 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2485 (__v8df)(__m512d)(B), \ 2486 (__v8df)(__m512d)(C), \ 2487 (__mmask8)(U), (int)(R)) 2490 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 2491 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2492 (__v8df)(__m512d)(B), \ 2493 (__v8df)(__m512d)(C), \ 2494 (__mmask8)(U), (int)(R)) 2497 #define _mm512_fnmsub_round_pd(A, B, C, R) \ 2498 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2499 (__v8df)(__m512d)(B), \ 2500 -(__v8df)(__m512d)(C), \ 2501 (__mmask8)-1, (int)(R)) 2504 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 2505 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2506 (__v8df)(__m512d)(B), \ 2507 -(__v8df)(__m512d)(C), \ 2508 (__mmask8)(U), (int)(R)) 2514 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2524 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2534 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2544 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2554 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2564 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2574 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2584 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2594 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2604 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2614 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2624 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2631 #define _mm512_fmadd_round_ps(A, B, C, R) \ 2632 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2633 (__v16sf)(__m512)(B), \ 2634 (__v16sf)(__m512)(C), \ 2635 (__mmask16)-1, (int)(R)) 2638 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 2639 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2640 (__v16sf)(__m512)(B), \ 2641 (__v16sf)(__m512)(C), \ 2642 (__mmask16)(U), (int)(R)) 2645 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 2646 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2647 (__v16sf)(__m512)(B), \ 2648 (__v16sf)(__m512)(C), \ 2649 (__mmask16)(U), (int)(R)) 2652 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 2653 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2654 (__v16sf)(__m512)(B), \ 2655 (__v16sf)(__m512)(C), \ 2656 (__mmask16)(U), (int)(R)) 2659 #define _mm512_fmsub_round_ps(A, B, C, R) \ 2660 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2661 (__v16sf)(__m512)(B), \ 2662 -(__v16sf)(__m512)(C), \ 2663 (__mmask16)-1, (int)(R)) 2666 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 2667 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2668 (__v16sf)(__m512)(B), \ 2669 -(__v16sf)(__m512)(C), \ 2670 (__mmask16)(U), (int)(R)) 2673 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 2674 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2675 (__v16sf)(__m512)(B), \ 2676 -(__v16sf)(__m512)(C), \ 2677 (__mmask16)(U), (int)(R)) 2680 #define _mm512_fnmadd_round_ps(A, B, C, R) \ 2681 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2682 -(__v16sf)(__m512)(B), \ 2683 (__v16sf)(__m512)(C), \ 2684 (__mmask16)-1, (int)(R)) 2687 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 2688 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2689 (__v16sf)(__m512)(B), \ 2690 (__v16sf)(__m512)(C), \ 2691 (__mmask16)(U), (int)(R)) 2694 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 2695 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2696 (__v16sf)(__m512)(B), \ 2697 (__v16sf)(__m512)(C), \ 2698 (__mmask16)(U), (int)(R)) 2701 #define _mm512_fnmsub_round_ps(A, B, C, R) \ 2702 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2703 -(__v16sf)(__m512)(B), \ 2704 -(__v16sf)(__m512)(C), \ 2705 (__mmask16)-1, (int)(R)) 2708 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 2709 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2710 (__v16sf)(__m512)(B), \ 2711 -(__v16sf)(__m512)(C), \ 2712 (__mmask16)(U), (int)(R)) 2718 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2728 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2738 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2748 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2758 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2768 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2778 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2798 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2808 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2828 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2835 #define _mm512_fmaddsub_round_pd(A, B, C, R) \ 2836 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2837 (__v8df)(__m512d)(B), \ 2838 (__v8df)(__m512d)(C), \ 2839 (__mmask8)-1, (int)(R)) 2842 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 2843 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2844 (__v8df)(__m512d)(B), \ 2845 (__v8df)(__m512d)(C), \ 2846 (__mmask8)(U), (int)(R)) 2849 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 2850 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 2851 (__v8df)(__m512d)(B), \ 2852 (__v8df)(__m512d)(C), \ 2853 (__mmask8)(U), (int)(R)) 2856 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 2857 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2858 (__v8df)(__m512d)(B), \ 2859 (__v8df)(__m512d)(C), \ 2860 (__mmask8)(U), (int)(R)) 2863 #define _mm512_fmsubadd_round_pd(A, B, C, R) \ 2864 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2865 (__v8df)(__m512d)(B), \ 2866 -(__v8df)(__m512d)(C), \ 2867 (__mmask8)-1, (int)(R)) 2870 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 2871 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2872 (__v8df)(__m512d)(B), \ 2873 -(__v8df)(__m512d)(C), \ 2874 (__mmask8)(U), (int)(R)) 2877 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 2878 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2879 (__v8df)(__m512d)(B), \ 2880 -(__v8df)(__m512d)(C), \ 2881 (__mmask8)(U), (int)(R)) 2887 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2897 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2907 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2917 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2927 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2937 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2947 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2954 #define _mm512_fmaddsub_round_ps(A, B, C, R) \ 2955 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2956 (__v16sf)(__m512)(B), \ 2957 (__v16sf)(__m512)(C), \ 2958 (__mmask16)-1, (int)(R)) 2961 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 2962 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2963 (__v16sf)(__m512)(B), \ 2964 (__v16sf)(__m512)(C), \ 2965 (__mmask16)(U), (int)(R)) 2968 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 2969 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 2970 (__v16sf)(__m512)(B), \ 2971 (__v16sf)(__m512)(C), \ 2972 (__mmask16)(U), (int)(R)) 2975 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 2976 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 2977 (__v16sf)(__m512)(B), \ 2978 (__v16sf)(__m512)(C), \ 2979 (__mmask16)(U), (int)(R)) 2982 #define _mm512_fmsubadd_round_ps(A, B, C, R) \ 2983 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2984 (__v16sf)(__m512)(B), \ 2985 -(__v16sf)(__m512)(C), \ 2986 (__mmask16)-1, (int)(R)) 2989 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 2990 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2991 (__v16sf)(__m512)(B), \ 2992 -(__v16sf)(__m512)(C), \ 2993 (__mmask16)(U), (int)(R)) 2996 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 2997 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 2998 (__v16sf)(__m512)(B), \ 2999 -(__v16sf)(__m512)(C), \ 3000 (__mmask16)(U), (int)(R)) 3006 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3016 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3026 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3036 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3046 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3056 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3066 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3073 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3074 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3075 (__v8df)(__m512d)(B), \ 3076 (__v8df)(__m512d)(C), \ 3077 (__mmask8)(U), (int)(R)) 3083 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3090 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3091 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3092 (__v16sf)(__m512)(B), \ 3093 (__v16sf)(__m512)(C), \ 3094 (__mmask16)(U), (int)(R)) 3099 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3106 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3107 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3108 (__v8df)(__m512d)(B), \ 3109 (__v8df)(__m512d)(C), \ 3110 (__mmask8)(U), (int)(R)) 3116 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3123 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3124 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3125 (__v16sf)(__m512)(B), \ 3126 (__v16sf)(__m512)(C), \ 3127 (__mmask16)(U), (int)(R)) 3133 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3140 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3141 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 3142 -(__v8df)(__m512d)(B), \ 3143 (__v8df)(__m512d)(C), \ 3144 (__mmask8)(U), (int)(R)) 3150 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3157 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3158 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 3159 -(__v16sf)(__m512)(B), \ 3160 (__v16sf)(__m512)(C), \ 3161 (__mmask16)(U), (int)(R)) 3167 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3174 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3175 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 3176 -(__v8df)(__m512d)(B), \ 3177 -(__v8df)(__m512d)(C), \ 3178 (__mmask8)(U), (int)(R)) 3181 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3182 (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ 3183 (__v8df)(__m512d)(B), \ 3184 (__v8df)(__m512d)(C), \ 3185 (__mmask8)(U), (int)(R)) 3191 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3201 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3208 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3209 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 3210 -(__v16sf)(__m512)(B), \ 3211 -(__v16sf)(__m512)(C), \ 3212 (__mmask16)(U), (int)(R)) 3215 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3216 (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ 3217 (__v16sf)(__m512)(B), \ 3218 (__v16sf)(__m512)(C), \ 3219 (__mmask16)(U), (int)(R)) 3225 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3235 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3249 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3257 return (__m512i)__builtin_ia32_selectd_512(__U,
3266 return (__m512i)__builtin_ia32_selectd_512(__U,
3275 return (__m512i)__builtin_ia32_selectd_512(__U,
3283 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3291 return (__m512i)__builtin_ia32_selectq_512(__U,
3300 return (__m512i)__builtin_ia32_selectq_512(__U,
3309 return (__m512i)__builtin_ia32_selectq_512(__U,
3314 #define _mm512_alignr_epi64(A, B, I) \ 3315 (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ 3316 (__v8di)(__m512i)(B), (int)(I)) 3318 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ 3319 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3320 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3321 (__v8di)(__m512i)(W)) 3323 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \ 3324 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3325 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3326 (__v8di)_mm512_setzero_si512()) 3328 #define _mm512_alignr_epi32(A, B, I) \ 3329 (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ 3330 (__v16si)(__m512i)(B), (int)(I)) 3332 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ 3333 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3334 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3335 (__v16si)(__m512i)(W)) 3337 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \ 3338 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3339 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3340 (__v16si)_mm512_setzero_si512()) 3343 #define _mm512_extractf64x4_pd(A, I) \ 3344 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ 3345 (__v4df)_mm256_undefined_pd(), \ 3348 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ 3349 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3350 (__v4df)(__m256d)(W), \ 3353 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \ 3354 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3355 (__v4df)_mm256_setzero_pd(), \ 3358 #define _mm512_extractf32x4_ps(A, I) \ 3359 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ 3360 (__v4sf)_mm_undefined_ps(), \ 3363 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ 3364 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3365 (__v4sf)(__m128)(W), \ 3368 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \ 3369 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3370 (__v4sf)_mm_setzero_ps(), \ 3378 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3386 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3394 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3402 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3409 #define _mm512_cmp_round_ps_mask(A, B, P, R) \ 3410 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3411 (__v16sf)(__m512)(B), (int)(P), \ 3412 (__mmask16)-1, (int)(R)) 3414 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ 3415 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3416 (__v16sf)(__m512)(B), (int)(P), \ 3417 (__mmask16)(U), (int)(R)) 3419 #define _mm512_cmp_ps_mask(A, B, P) \ 3420 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3421 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3422 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3424 #define _mm512_cmpeq_ps_mask(A, B) \ 3425 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 3426 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 3427 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 3429 #define _mm512_cmplt_ps_mask(A, B) \ 3430 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 3431 #define _mm512_mask_cmplt_ps_mask(k, A, B) \ 3432 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 3434 #define _mm512_cmple_ps_mask(A, B) \ 3435 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 3436 #define _mm512_mask_cmple_ps_mask(k, A, B) \ 3437 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 3439 #define _mm512_cmpunord_ps_mask(A, B) \ 3440 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 3441 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 3442 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 3444 #define _mm512_cmpneq_ps_mask(A, B) \ 3445 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 3446 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 3447 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 3449 #define _mm512_cmpnlt_ps_mask(A, B) \ 3450 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 3451 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 3452 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 3454 #define _mm512_cmpnle_ps_mask(A, B) \ 3455 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 3456 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 3457 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 3459 #define _mm512_cmpord_ps_mask(A, B) \ 3460 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 3461 #define _mm512_mask_cmpord_ps_mask(k, A, B) \ 3462 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 3464 #define _mm512_cmp_round_pd_mask(A, B, P, R) \ 3465 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3466 (__v8df)(__m512d)(B), (int)(P), \ 3467 (__mmask8)-1, (int)(R)) 3469 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ 3470 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3471 (__v8df)(__m512d)(B), (int)(P), \ 3472 (__mmask8)(U), (int)(R)) 3474 #define _mm512_cmp_pd_mask(A, B, P) \ 3475 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3476 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3477 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3479 #define _mm512_cmpeq_pd_mask(A, B) \ 3480 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 3481 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 3482 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 3484 #define _mm512_cmplt_pd_mask(A, B) \ 3485 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 3486 #define _mm512_mask_cmplt_pd_mask(k, A, B) \ 3487 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 3489 #define _mm512_cmple_pd_mask(A, B) \ 3490 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 3491 #define _mm512_mask_cmple_pd_mask(k, A, B) \ 3492 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 3494 #define _mm512_cmpunord_pd_mask(A, B) \ 3495 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 3496 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 3497 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 3499 #define _mm512_cmpneq_pd_mask(A, B) \ 3500 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 3501 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 3502 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 3504 #define _mm512_cmpnlt_pd_mask(A, B) \ 3505 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 3506 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 3507 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 3509 #define _mm512_cmpnle_pd_mask(A, B) \ 3510 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 3511 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 3512 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 3514 #define _mm512_cmpord_pd_mask(A, B) \ 3515 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 3516 #define _mm512_mask_cmpord_pd_mask(k, A, B) \ 3517 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 3521 #define _mm512_cvtt_roundps_epu32(A, R) \ 3522 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3523 (__v16si)_mm512_undefined_epi32(), \ 3524 (__mmask16)-1, (int)(R)) 3526 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ 3527 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3528 (__v16si)(__m512i)(W), \ 3529 (__mmask16)(U), (int)(R)) 3531 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ 3532 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3533 (__v16si)_mm512_setzero_si512(), \ 3534 (__mmask16)(U), (int)(R)) 3540 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3550 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3559 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3565 #define _mm512_cvt_roundepi32_ps(A, R) \ 3566 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3567 (__v16sf)_mm512_setzero_ps(), \ 3568 (__mmask16)-1, (int)(R)) 3570 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ 3571 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3572 (__v16sf)(__m512)(W), \ 3573 (__mmask16)(U), (int)(R)) 3575 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ 3576 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3577 (__v16sf)_mm512_setzero_ps(), \ 3578 (__mmask16)(U), (int)(R)) 3580 #define _mm512_cvt_roundepu32_ps(A, R) \ 3581 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3582 (__v16sf)_mm512_setzero_ps(), \ 3583 (__mmask16)-1, (int)(R)) 3585 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ 3586 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3587 (__v16sf)(__m512)(W), \ 3588 (__mmask16)(U), (int)(R)) 3590 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ 3591 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3592 (__v16sf)_mm512_setzero_ps(), \ 3593 (__mmask16)(U), (int)(R)) 3598 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3604 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3612 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3620 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3626 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3634 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3654 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3660 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3668 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3676 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3682 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3690 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3707 #define _mm512_cvt_roundpd_ps(A, R) \ 3708 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3709 (__v8sf)_mm256_setzero_ps(), \ 3710 (__mmask8)-1, (int)(R)) 3712 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ 3713 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3714 (__v8sf)(__m256)(W), (__mmask8)(U), \ 3717 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ 3718 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3719 (__v8sf)_mm256_setzero_ps(), \ 3720 (__mmask8)(U), (int)(R)) 3725 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3734 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3743 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3752 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3754 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3760 return (__m512) __builtin_shufflevector (
3764 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3767 #define _mm512_cvt_roundps_ph(A, I) \ 3768 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3769 (__v16hi)_mm256_undefined_si256(), \ 3772 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 3773 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3774 (__v16hi)(__m256i)(U), \ 3777 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 3778 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3779 (__v16hi)_mm256_setzero_si256(), \ 3782 #define _mm512_cvtps_ph _mm512_cvt_roundps_ph 3783 #define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph 3784 #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph 3786 #define _mm512_cvt_roundph_ps(A, R) \ 3787 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3788 (__v16sf)_mm512_undefined_ps(), \ 3789 (__mmask16)-1, (int)(R)) 3791 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ 3792 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3793 (__v16sf)(__m512)(W), \ 3794 (__mmask16)(U), (int)(R)) 3796 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \ 3797 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3798 (__v16sf)_mm512_setzero_ps(), \ 3799 (__mmask16)(U), (int)(R)) 3805 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3815 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3824 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3830 #define _mm512_cvtt_roundpd_epi32(A, R) \ 3831 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3832 (__v8si)_mm256_setzero_si256(), \ 3833 (__mmask8)-1, (int)(R)) 3835 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ 3836 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3837 (__v8si)(__m256i)(W), \ 3838 (__mmask8)(U), (int)(R)) 3840 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ 3841 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3842 (__v8si)_mm256_setzero_si256(), \ 3843 (__mmask8)(U), (int)(R)) 3848 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3857 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3866 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3872 #define _mm512_cvtt_roundps_epi32(A, R) \ 3873 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3874 (__v16si)_mm512_setzero_si512(), \ 3875 (__mmask16)-1, (int)(R)) 3877 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ 3878 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3879 (__v16si)(__m512i)(W), \ 3880 (__mmask16)(U), (int)(R)) 3882 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ 3883 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3884 (__v16si)_mm512_setzero_si512(), \ 3885 (__mmask16)(U), (int)(R)) 3891 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3899 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3908 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3914 #define _mm512_cvt_roundps_epi32(A, R) \ 3915 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3916 (__v16si)_mm512_setzero_si512(), \ 3917 (__mmask16)-1, (int)(R)) 3919 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ 3920 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3921 (__v16si)(__m512i)(W), \ 3922 (__mmask16)(U), (int)(R)) 3924 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ 3925 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3926 (__v16si)_mm512_setzero_si512(), \ 3927 (__mmask16)(U), (int)(R)) 3932 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3941 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3950 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3957 #define _mm512_cvt_roundpd_epi32(A, R) \ 3958 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3959 (__v8si)_mm256_setzero_si256(), \ 3960 (__mmask8)-1, (int)(R)) 3962 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ 3963 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3964 (__v8si)(__m256i)(W), \ 3965 (__mmask8)(U), (int)(R)) 3967 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ 3968 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3969 (__v8si)_mm256_setzero_si256(), \ 3970 (__mmask8)(U), (int)(R)) 3975 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3985 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3994 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4001 #define _mm512_cvt_roundps_epu32(A, R) \ 4002 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4003 (__v16si)_mm512_setzero_si512(), \ 4004 (__mmask16)-1, (int)(R)) 4006 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ 4007 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4008 (__v16si)(__m512i)(W), \ 4009 (__mmask16)(U), (int)(R)) 4011 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ 4012 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4013 (__v16si)_mm512_setzero_si512(), \ 4014 (__mmask16)(U), (int)(R)) 4019 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4029 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4038 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4045 #define _mm512_cvt_roundpd_epu32(A, R) \ 4046 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4047 (__v8si)_mm256_setzero_si256(), \ 4048 (__mmask8)-1, (int)(R)) 4050 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ 4051 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4052 (__v8si)(__m256i)(W), \ 4053 (__mmask8)(U), (int)(R)) 4055 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ 4056 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4057 (__v8si)_mm256_setzero_si256(), \ 4058 (__mmask8)(U), (int)(R)) 4063 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4073 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4082 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4106 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4107 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4113 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4121 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4129 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4130 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4136 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4144 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4152 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4154 2+4, 18+4, 3+4, 19+4,
4155 2+8, 18+8, 3+8, 19+8,
4156 2+12, 18+12, 3+12, 19+12);
4162 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4170 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4178 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4180 0+4, 16+4, 1+4, 17+4,
4181 0+8, 16+8, 1+8, 17+8,
4182 0+12, 16+12, 1+12, 17+12);
4188 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4196 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4204 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4206 2+4, 18+4, 3+4, 19+4,
4207 2+8, 18+8, 3+8, 19+8,
4208 2+12, 18+12, 3+12, 19+12);
4214 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4222 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4230 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4232 0+4, 16+4, 1+4, 17+4,
4233 0+8, 16+8, 1+8, 17+8,
4234 0+12, 16+12, 1+12, 17+12);
4240 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4248 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4256 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4257 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4263 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4271 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4279 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4286 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4294 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4305 struct __loadu_si512 {
4308 return ((
struct __loadu_si512*)__P)->__v;
4314 struct __loadu_epi32 {
4317 return ((
struct __loadu_epi32*)__P)->__v;
4323 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *) __P,
4332 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)__P,
4341 struct __loadu_epi64 {
4344 return ((
struct __loadu_epi64*)__P)->__v;
4350 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *) __P,
4358 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)__P,
4367 return (__m512) __builtin_ia32_loadups512_mask ((
const float *) __P,
4375 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)__P,
4384 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *) __P,
4392 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)__P,
4404 return ((
struct __loadu_pd*)__p)->__v;
4413 return ((
struct __loadu_ps*)__p)->__v;
4419 return *(__m512*)__p;
4425 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *) __P,
4433 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)__P,
4442 return *(__m512d*)__p;
4448 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *) __P,
4456 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)__P,
4465 return *(__m512i *) __P;
4471 return *(__m512i *) __P;
4477 return *(__m512i *) __P;
4485 struct __storeu_epi64 {
4488 ((
struct __storeu_epi64*)__P)->__v =
__A;
4494 __builtin_ia32_storedqudi512_mask ((
long long *)__P, (__v8di) __A,
4501 struct __storeu_si512 {
4504 ((
struct __storeu_si512*)__P)->__v =
__A;
4510 struct __storeu_epi32 {
4513 ((
struct __storeu_epi32*)__P)->__v =
__A;
4519 __builtin_ia32_storedqusi512_mask ((
int *)__P, (__v16si) __A,
4526 __builtin_ia32_storeupd512_mask ((
double *)__P, (__v8df) __A, (__mmask8) __U);
4532 struct __storeu_pd {
4535 ((
struct __storeu_pd*)__P)->__v =
__A;
4541 __builtin_ia32_storeups512_mask ((
float *)__P, (__v16sf) __A,
4548 struct __storeu_ps {
4551 ((
struct __storeu_ps*)__P)->__v =
__A;
4557 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4563 *(__m512d*)__P = __A;
4569 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4576 *(__m512*)__P = __A;
4582 *(__m512i *) __P = __A;
4588 *(__m512i *) __P = __A;
4594 *(__m512i *) __P = __A;
4602 return __builtin_ia32_knothi(__M);
4607 #define _mm512_cmpeq_epi32_mask(A, B) \ 4608 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 4609 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \ 4610 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 4611 #define _mm512_cmpge_epi32_mask(A, B) \ 4612 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 4613 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \ 4614 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 4615 #define _mm512_cmpgt_epi32_mask(A, B) \ 4616 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 4617 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \ 4618 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 4619 #define _mm512_cmple_epi32_mask(A, B) \ 4620 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 4621 #define _mm512_mask_cmple_epi32_mask(k, A, B) \ 4622 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 4623 #define _mm512_cmplt_epi32_mask(A, B) \ 4624 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 4625 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \ 4626 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 4627 #define _mm512_cmpneq_epi32_mask(A, B) \ 4628 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 4629 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \ 4630 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 4632 #define _mm512_cmpeq_epu32_mask(A, B) \ 4633 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 4634 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \ 4635 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 4636 #define _mm512_cmpge_epu32_mask(A, B) \ 4637 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 4638 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \ 4639 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 4640 #define _mm512_cmpgt_epu32_mask(A, B) \ 4641 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 4642 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \ 4643 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 4644 #define _mm512_cmple_epu32_mask(A, B) \ 4645 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 4646 #define _mm512_mask_cmple_epu32_mask(k, A, B) \ 4647 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 4648 #define _mm512_cmplt_epu32_mask(A, B) \ 4649 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 4650 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \ 4651 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 4652 #define _mm512_cmpneq_epu32_mask(A, B) \ 4653 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 4654 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \ 4655 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 4657 #define _mm512_cmpeq_epi64_mask(A, B) \ 4658 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 4659 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \ 4660 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 4661 #define _mm512_cmpge_epi64_mask(A, B) \ 4662 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 4663 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \ 4664 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 4665 #define _mm512_cmpgt_epi64_mask(A, B) \ 4666 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 4667 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \ 4668 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 4669 #define _mm512_cmple_epi64_mask(A, B) \ 4670 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 4671 #define _mm512_mask_cmple_epi64_mask(k, A, B) \ 4672 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 4673 #define _mm512_cmplt_epi64_mask(A, B) \ 4674 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 4675 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \ 4676 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 4677 #define _mm512_cmpneq_epi64_mask(A, B) \ 4678 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 4679 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \ 4680 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 4682 #define _mm512_cmpeq_epu64_mask(A, B) \ 4683 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 4684 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \ 4685 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 4686 #define _mm512_cmpge_epu64_mask(A, B) \ 4687 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 4688 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \ 4689 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 4690 #define _mm512_cmpgt_epu64_mask(A, B) \ 4691 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 4692 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \ 4693 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 4694 #define _mm512_cmple_epu64_mask(A, B) \ 4695 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 4696 #define _mm512_mask_cmple_epu64_mask(k, A, B) \ 4697 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 4698 #define _mm512_cmplt_epu64_mask(A, B) \ 4699 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 4700 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \ 4701 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 4702 #define _mm512_cmpneq_epu64_mask(A, B) \ 4703 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 4704 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ 4705 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 4712 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4718 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4726 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4736 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4742 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4750 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4758 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4764 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4772 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4780 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4786 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4794 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4802 return (__m512i)__builtin_convertvector((
__v8hi)__A, __v8di);
4808 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4816 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4824 return (__m512i)__builtin_convertvector((
__v16qu)__A, __v16si);
4830 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4838 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4846 return (__m512i)__builtin_convertvector(__builtin_shufflevector((
__v16qu)__A, (
__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4868 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4874 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4882 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4890 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4896 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4904 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4912 return (__m512i)__builtin_convertvector((
__v8hu)__A, __v8di);
4918 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4926 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4934 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4940 return (__m512i)__builtin_ia32_selectd_512(__U,
4948 return (__m512i)__builtin_ia32_selectd_512(__U,
4956 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4962 return (__m512i)__builtin_ia32_selectq_512(__U,
4970 return (__m512i)__builtin_ia32_selectq_512(__U,
4977 #define _mm512_cmp_epi32_mask(a, b, p) \ 4978 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 4979 (__v16si)(__m512i)(b), (int)(p), \ 4982 #define _mm512_cmp_epu32_mask(a, b, p) \ 4983 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 4984 (__v16si)(__m512i)(b), (int)(p), \ 4987 #define _mm512_cmp_epi64_mask(a, b, p) \ 4988 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 4989 (__v8di)(__m512i)(b), (int)(p), \ 4992 #define _mm512_cmp_epu64_mask(a, b, p) \ 4993 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 4994 (__v8di)(__m512i)(b), (int)(p), \ 4997 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ 4998 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 4999 (__v16si)(__m512i)(b), (int)(p), \ 5002 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ 5003 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5004 (__v16si)(__m512i)(b), (int)(p), \ 5007 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ 5008 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5009 (__v8di)(__m512i)(b), (int)(p), \ 5012 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ 5013 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5014 (__v8di)(__m512i)(b), (int)(p), \ 5017 #define _mm512_rol_epi32(a, b) \ 5018 (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)) 5020 #define _mm512_mask_rol_epi32(W, U, a, b) \ 5021 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5022 (__v16si)_mm512_rol_epi32((a), (b)), \ 5023 (__v16si)(__m512i)(W)) 5025 #define _mm512_maskz_rol_epi32(U, a, b) \ 5026 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5027 (__v16si)_mm512_rol_epi32((a), (b)), \ 5028 (__v16si)_mm512_setzero_si512()) 5030 #define _mm512_rol_epi64(a, b) \ 5031 (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)) 5033 #define _mm512_mask_rol_epi64(W, U, a, b) \ 5034 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5035 (__v8di)_mm512_rol_epi64((a), (b)), \ 5036 (__v8di)(__m512i)(W)) 5038 #define _mm512_maskz_rol_epi64(U, a, b) \ 5039 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5040 (__v8di)_mm512_rol_epi64((a), (b)), \ 5041 (__v8di)_mm512_setzero_si512()) 5046 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5052 return (__m512i)__builtin_ia32_selectd_512(__U,
5060 return (__m512i)__builtin_ia32_selectd_512(__U,
5068 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5074 return (__m512i)__builtin_ia32_selectq_512(__U,
5082 return (__m512i)__builtin_ia32_selectq_512(__U,
5087 #define _mm512_ror_epi32(A, B) \ 5088 (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)) 5090 #define _mm512_mask_ror_epi32(W, U, A, B) \ 5091 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5092 (__v16si)_mm512_ror_epi32((A), (B)), \ 5093 (__v16si)(__m512i)(W)) 5095 #define _mm512_maskz_ror_epi32(U, A, B) \ 5096 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5097 (__v16si)_mm512_ror_epi32((A), (B)), \ 5098 (__v16si)_mm512_setzero_si512()) 5100 #define _mm512_ror_epi64(A, B) \ 5101 (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)) 5103 #define _mm512_mask_ror_epi64(W, U, A, B) \ 5104 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5105 (__v8di)_mm512_ror_epi64((A), (B)), \ 5106 (__v8di)(__m512i)(W)) 5108 #define _mm512_maskz_ror_epi64(U, A, B) \ 5109 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5110 (__v8di)_mm512_ror_epi64((A), (B)), \ 5111 (__v8di)_mm512_setzero_si512()) 5116 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5122 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5129 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5137 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5143 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5151 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5159 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5165 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5172 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5180 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5186 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5194 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5202 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5210 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5219 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5226 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5234 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5242 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5250 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5258 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5266 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5275 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5282 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5283 0, 0, 2, 2, 4, 4, 6, 6);
5289 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5297 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5302 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ 5303 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5304 (__v8df)(__m512d)(B), \ 5305 (__v8di)(__m512i)(C), (int)(imm), \ 5306 (__mmask8)-1, (int)(R)) 5308 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ 5309 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5310 (__v8df)(__m512d)(B), \ 5311 (__v8di)(__m512i)(C), (int)(imm), \ 5312 (__mmask8)(U), (int)(R)) 5314 #define _mm512_fixupimm_pd(A, B, C, imm) \ 5315 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5316 (__v8df)(__m512d)(B), \ 5317 (__v8di)(__m512i)(C), (int)(imm), \ 5319 _MM_FROUND_CUR_DIRECTION) 5321 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ 5322 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5323 (__v8df)(__m512d)(B), \ 5324 (__v8di)(__m512i)(C), (int)(imm), \ 5326 _MM_FROUND_CUR_DIRECTION) 5328 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ 5329 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5330 (__v8df)(__m512d)(B), \ 5331 (__v8di)(__m512i)(C), \ 5332 (int)(imm), (__mmask8)(U), \ 5335 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ 5336 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5337 (__v8df)(__m512d)(B), \ 5338 (__v8di)(__m512i)(C), \ 5339 (int)(imm), (__mmask8)(U), \ 5340 _MM_FROUND_CUR_DIRECTION) 5342 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ 5343 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5344 (__v16sf)(__m512)(B), \ 5345 (__v16si)(__m512i)(C), (int)(imm), \ 5346 (__mmask16)-1, (int)(R)) 5348 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ 5349 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5350 (__v16sf)(__m512)(B), \ 5351 (__v16si)(__m512i)(C), (int)(imm), \ 5352 (__mmask16)(U), (int)(R)) 5354 #define _mm512_fixupimm_ps(A, B, C, imm) \ 5355 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5356 (__v16sf)(__m512)(B), \ 5357 (__v16si)(__m512i)(C), (int)(imm), \ 5359 _MM_FROUND_CUR_DIRECTION) 5361 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ 5362 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5363 (__v16sf)(__m512)(B), \ 5364 (__v16si)(__m512i)(C), (int)(imm), \ 5366 _MM_FROUND_CUR_DIRECTION) 5368 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ 5369 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5370 (__v16sf)(__m512)(B), \ 5371 (__v16si)(__m512i)(C), \ 5372 (int)(imm), (__mmask16)(U), \ 5375 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ 5376 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5377 (__v16sf)(__m512)(B), \ 5378 (__v16si)(__m512i)(C), \ 5379 (int)(imm), (__mmask16)(U), \ 5380 _MM_FROUND_CUR_DIRECTION) 5382 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \ 5383 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5384 (__v2df)(__m128d)(B), \ 5385 (__v2di)(__m128i)(C), (int)(imm), \ 5386 (__mmask8)-1, (int)(R)) 5388 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ 5389 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5390 (__v2df)(__m128d)(B), \ 5391 (__v2di)(__m128i)(C), (int)(imm), \ 5392 (__mmask8)(U), (int)(R)) 5394 #define _mm_fixupimm_sd(A, B, C, imm) \ 5395 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5396 (__v2df)(__m128d)(B), \ 5397 (__v2di)(__m128i)(C), (int)(imm), \ 5399 _MM_FROUND_CUR_DIRECTION) 5401 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ 5402 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5403 (__v2df)(__m128d)(B), \ 5404 (__v2di)(__m128i)(C), (int)(imm), \ 5406 _MM_FROUND_CUR_DIRECTION) 5408 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ 5409 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5410 (__v2df)(__m128d)(B), \ 5411 (__v2di)(__m128i)(C), (int)(imm), \ 5412 (__mmask8)(U), (int)(R)) 5414 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ 5415 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5416 (__v2df)(__m128d)(B), \ 5417 (__v2di)(__m128i)(C), (int)(imm), \ 5419 _MM_FROUND_CUR_DIRECTION) 5421 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \ 5422 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5423 (__v4sf)(__m128)(B), \ 5424 (__v4si)(__m128i)(C), (int)(imm), \ 5425 (__mmask8)-1, (int)(R)) 5427 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ 5428 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5429 (__v4sf)(__m128)(B), \ 5430 (__v4si)(__m128i)(C), (int)(imm), \ 5431 (__mmask8)(U), (int)(R)) 5433 #define _mm_fixupimm_ss(A, B, C, imm) \ 5434 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5435 (__v4sf)(__m128)(B), \ 5436 (__v4si)(__m128i)(C), (int)(imm), \ 5438 _MM_FROUND_CUR_DIRECTION) 5440 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ 5441 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5442 (__v4sf)(__m128)(B), \ 5443 (__v4si)(__m128i)(C), (int)(imm), \ 5445 _MM_FROUND_CUR_DIRECTION) 5447 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ 5448 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5449 (__v4sf)(__m128)(B), \ 5450 (__v4si)(__m128i)(C), (int)(imm), \ 5451 (__mmask8)(U), (int)(R)) 5453 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ 5454 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5455 (__v4sf)(__m128)(B), \ 5456 (__v4si)(__m128i)(C), (int)(imm), \ 5458 _MM_FROUND_CUR_DIRECTION) 5460 #define _mm_getexp_round_sd(A, B, R) \ 5461 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5462 (__v2df)(__m128d)(B), \ 5463 (__v2df)_mm_setzero_pd(), \ 5464 (__mmask8)-1, (int)(R)) 5470 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((
__v2df) __A,
5477 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (
__v2df) __A,
5484 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \ 5485 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5486 (__v2df)(__m128d)(B), \ 5487 (__v2df)(__m128d)(W), \ 5488 (__mmask8)(U), (int)(R)) 5493 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (
__v2df) __A,
5500 #define _mm_maskz_getexp_round_sd(U, A, B, R) \ 5501 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5502 (__v2df)(__m128d)(B), \ 5503 (__v2df)_mm_setzero_pd(), \ 5504 (__mmask8)(U), (int)(R)) 5506 #define _mm_getexp_round_ss(A, B, R) \ 5507 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5508 (__v4sf)(__m128)(B), \ 5509 (__v4sf)_mm_setzero_ps(), \ 5510 (__mmask8)-1, (int)(R)) 5515 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5522 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5529 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \ 5530 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5531 (__v4sf)(__m128)(B), \ 5532 (__v4sf)(__m128)(W), \ 5533 (__mmask8)(U), (int)(R)) 5538 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5545 #define _mm_maskz_getexp_round_ss(U, A, B, R) \ 5546 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5547 (__v4sf)(__m128)(B), \ 5548 (__v4sf)_mm_setzero_ps(), \ 5549 (__mmask8)(U), (int)(R)) 5551 #define _mm_getmant_round_sd(A, B, C, D, R) \ 5552 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5553 (__v2df)(__m128d)(B), \ 5554 (int)(((D)<<2) | (C)), \ 5555 (__v2df)_mm_setzero_pd(), \ 5556 (__mmask8)-1, (int)(R)) 5558 #define _mm_getmant_sd(A, B, C, D) \ 5559 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5560 (__v2df)(__m128d)(B), \ 5561 (int)(((D)<<2) | (C)), \ 5562 (__v2df)_mm_setzero_pd(), \ 5564 _MM_FROUND_CUR_DIRECTION) 5566 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \ 5567 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5568 (__v2df)(__m128d)(B), \ 5569 (int)(((D)<<2) | (C)), \ 5570 (__v2df)(__m128d)(W), \ 5572 _MM_FROUND_CUR_DIRECTION) 5574 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ 5575 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5576 (__v2df)(__m128d)(B), \ 5577 (int)(((D)<<2) | (C)), \ 5578 (__v2df)(__m128d)(W), \ 5579 (__mmask8)(U), (int)(R)) 5581 #define _mm_maskz_getmant_sd(U, A, B, C, D) \ 5582 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5583 (__v2df)(__m128d)(B), \ 5584 (int)(((D)<<2) | (C)), \ 5585 (__v2df)_mm_setzero_pd(), \ 5587 _MM_FROUND_CUR_DIRECTION) 5589 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ 5590 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5591 (__v2df)(__m128d)(B), \ 5592 (int)(((D)<<2) | (C)), \ 5593 (__v2df)_mm_setzero_pd(), \ 5594 (__mmask8)(U), (int)(R)) 5596 #define _mm_getmant_round_ss(A, B, C, D, R) \ 5597 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5598 (__v4sf)(__m128)(B), \ 5599 (int)(((D)<<2) | (C)), \ 5600 (__v4sf)_mm_setzero_ps(), \ 5601 (__mmask8)-1, (int)(R)) 5603 #define _mm_getmant_ss(A, B, C, D) \ 5604 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5605 (__v4sf)(__m128)(B), \ 5606 (int)(((D)<<2) | (C)), \ 5607 (__v4sf)_mm_setzero_ps(), \ 5609 _MM_FROUND_CUR_DIRECTION) 5611 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \ 5612 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5613 (__v4sf)(__m128)(B), \ 5614 (int)(((D)<<2) | (C)), \ 5615 (__v4sf)(__m128)(W), \ 5617 _MM_FROUND_CUR_DIRECTION) 5619 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ 5620 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5621 (__v4sf)(__m128)(B), \ 5622 (int)(((D)<<2) | (C)), \ 5623 (__v4sf)(__m128)(W), \ 5624 (__mmask8)(U), (int)(R)) 5626 #define _mm_maskz_getmant_ss(U, A, B, C, D) \ 5627 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5628 (__v4sf)(__m128)(B), \ 5629 (int)(((D)<<2) | (C)), \ 5630 (__v4sf)_mm_setzero_ps(), \ 5632 _MM_FROUND_CUR_DIRECTION) 5634 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ 5635 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5636 (__v4sf)(__m128)(B), \ 5637 (int)(((D)<<2) | (C)), \ 5638 (__v4sf)_mm_setzero_ps(), \ 5639 (__mmask8)(U), (int)(R)) 5647 #define _mm_comi_round_sd(A, B, P, R) \ 5648 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 5651 #define _mm_comi_round_ss(A, B, P, R) \ 5652 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 5656 #define _mm_cvt_roundsd_si64(A, R) \ 5657 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) 5663 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (
__v4si)__B);
5669 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5677 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5685 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (
__v2di)__B);
5691 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5699 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5707 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5713 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5721 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5729 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5735 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5743 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5751 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (
__v4si)__B);
5757 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5765 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5773 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (
__v2di)__B);
5779 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5787 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5795 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5801 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5809 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5817 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5823 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5831 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5839 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (
__v4si)__B);
5845 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5853 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5861 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (
__v2di)__B);
5867 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5875 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5883 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5889 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5897 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5905 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5911 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5919 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5924 #define _mm512_ternarylogic_epi32(A, B, C, imm) \ 5925 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 5926 (__v16si)(__m512i)(B), \ 5927 (__v16si)(__m512i)(C), (int)(imm), \ 5930 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ 5931 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 5932 (__v16si)(__m512i)(B), \ 5933 (__v16si)(__m512i)(C), (int)(imm), \ 5936 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 5937 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 5938 (__v16si)(__m512i)(B), \ 5939 (__v16si)(__m512i)(C), \ 5940 (int)(imm), (__mmask16)(U)) 5942 #define _mm512_ternarylogic_epi64(A, B, C, imm) \ 5943 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 5944 (__v8di)(__m512i)(B), \ 5945 (__v8di)(__m512i)(C), (int)(imm), \ 5948 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ 5949 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 5950 (__v8di)(__m512i)(B), \ 5951 (__v8di)(__m512i)(C), (int)(imm), \ 5954 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 5955 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 5956 (__v8di)(__m512i)(B), \ 5957 (__v8di)(__m512i)(C), (int)(imm), \ 5961 #define _mm_cvt_roundsd_i64(A, R) \ 5962 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) 5965 #define _mm_cvt_roundsd_si32(A, R) \ 5966 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) 5968 #define _mm_cvt_roundsd_i32(A, R) \ 5969 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) 5971 #define _mm_cvt_roundsd_u32(A, R) \ 5972 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) 5977 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((
__v2df) __A,
5982 #define _mm_cvt_roundsd_u64(A, R) \ 5983 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 5987 _mm_cvtsd_u64 (__m128d
__A)
5989 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((
__v2df)
5995 #define _mm_cvt_roundss_si32(A, R) \ 5996 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) 5998 #define _mm_cvt_roundss_i32(A, R) \ 5999 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) 6002 #define _mm_cvt_roundss_si64(A, R) \ 6003 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) 6005 #define _mm_cvt_roundss_i64(A, R) \ 6006 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) 6009 #define _mm_cvt_roundss_u32(A, R) \ 6010 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) 6015 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6020 #define _mm_cvt_roundss_u64(A, R) \ 6021 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6025 _mm_cvtss_u64 (__m128 __A)
6027 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6033 #define _mm_cvtt_roundsd_i32(A, R) \ 6034 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) 6036 #define _mm_cvtt_roundsd_si32(A, R) \ 6037 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) 6042 return (
int) __builtin_ia32_vcvttsd2si32 ((
__v2df) __A,
6047 #define _mm_cvtt_roundsd_si64(A, R) \ 6048 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) 6050 #define _mm_cvtt_roundsd_i64(A, R) \ 6051 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) 6054 _mm_cvttsd_i64 (__m128d __A)
6056 return (
long long) __builtin_ia32_vcvttsd2si64 ((
__v2df) __A,
6061 #define _mm_cvtt_roundsd_u32(A, R) \ 6062 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) 6067 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((
__v2df) __A,
6072 #define _mm_cvtt_roundsd_u64(A, R) \ 6073 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6077 _mm_cvttsd_u64 (__m128d __A)
6079 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((
__v2df)
6085 #define _mm_cvtt_roundss_i32(A, R) \ 6086 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) 6088 #define _mm_cvtt_roundss_si32(A, R) \ 6089 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) 6094 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6099 #define _mm_cvtt_roundss_i64(A, R) \ 6100 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) 6102 #define _mm_cvtt_roundss_si64(A, R) \ 6103 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) 6106 _mm_cvttss_i64 (__m128 __A)
6108 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6113 #define _mm_cvtt_roundss_u32(A, R) \ 6114 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) 6119 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6124 #define _mm_cvtt_roundss_u64(A, R) \ 6125 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6129 _mm_cvttss_u64 (__m128 __A)
6131 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6137 #define _mm512_permute_pd(X, C) \ 6138 (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)) 6140 #define _mm512_mask_permute_pd(W, U, X, C) \ 6141 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6142 (__v8df)_mm512_permute_pd((X), (C)), \ 6143 (__v8df)(__m512d)(W)) 6145 #define _mm512_maskz_permute_pd(U, X, C) \ 6146 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6147 (__v8df)_mm512_permute_pd((X), (C)), \ 6148 (__v8df)_mm512_setzero_pd()) 6150 #define _mm512_permute_ps(X, C) \ 6151 (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)) 6153 #define _mm512_mask_permute_ps(W, U, X, C) \ 6154 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6155 (__v16sf)_mm512_permute_ps((X), (C)), \ 6156 (__v16sf)(__m512)(W)) 6158 #define _mm512_maskz_permute_ps(U, X, C) \ 6159 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6160 (__v16sf)_mm512_permute_ps((X), (C)), \ 6161 (__v16sf)_mm512_setzero_ps()) 6166 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6172 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6180 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6188 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6194 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6202 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6210 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6217 return (__m512d)__builtin_ia32_selectpd_512(__U,
6226 return (__m512d)__builtin_ia32_selectpd_512(__U,
6228 (__v8df)(__m512d)__I);
6235 return (__m512d)__builtin_ia32_selectpd_512(__U,
6243 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6250 return (__m512)__builtin_ia32_selectps_512(__U,
6258 return (__m512)__builtin_ia32_selectps_512(__U,
6260 (__v16sf)(__m512)__I);
6266 return (__m512)__builtin_ia32_selectps_512(__U,
6272 #define _mm512_cvtt_roundpd_epu32(A, R) \ 6273 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6274 (__v8si)_mm256_undefined_si256(), \ 6275 (__mmask8)-1, (int)(R)) 6277 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ 6278 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6279 (__v8si)(__m256i)(W), \ 6280 (__mmask8)(U), (int)(R)) 6282 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ 6283 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6284 (__v8si)_mm256_setzero_si256(), \ 6285 (__mmask8)(U), (int)(R)) 6290 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6300 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6309 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6316 #define _mm_roundscale_round_sd(A, B, imm, R) \ 6317 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6318 (__v2df)(__m128d)(B), \ 6319 (__v2df)_mm_setzero_pd(), \ 6320 (__mmask8)-1, (int)(imm), \ 6323 #define _mm_roundscale_sd(A, B, imm) \ 6324 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6325 (__v2df)(__m128d)(B), \ 6326 (__v2df)_mm_setzero_pd(), \ 6327 (__mmask8)-1, (int)(imm), \ 6328 _MM_FROUND_CUR_DIRECTION) 6330 #define _mm_mask_roundscale_sd(W, U, A, B, imm) \ 6331 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6332 (__v2df)(__m128d)(B), \ 6333 (__v2df)(__m128d)(W), \ 6334 (__mmask8)(U), (int)(imm), \ 6335 _MM_FROUND_CUR_DIRECTION) 6337 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ 6338 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6339 (__v2df)(__m128d)(B), \ 6340 (__v2df)(__m128d)(W), \ 6341 (__mmask8)(U), (int)(I), \ 6344 #define _mm_maskz_roundscale_sd(U, A, B, I) \ 6345 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6346 (__v2df)(__m128d)(B), \ 6347 (__v2df)_mm_setzero_pd(), \ 6348 (__mmask8)(U), (int)(I), \ 6349 _MM_FROUND_CUR_DIRECTION) 6351 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ 6352 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6353 (__v2df)(__m128d)(B), \ 6354 (__v2df)_mm_setzero_pd(), \ 6355 (__mmask8)(U), (int)(I), \ 6358 #define _mm_roundscale_round_ss(A, B, imm, R) \ 6359 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6360 (__v4sf)(__m128)(B), \ 6361 (__v4sf)_mm_setzero_ps(), \ 6362 (__mmask8)-1, (int)(imm), \ 6365 #define _mm_roundscale_ss(A, B, imm) \ 6366 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6367 (__v4sf)(__m128)(B), \ 6368 (__v4sf)_mm_setzero_ps(), \ 6369 (__mmask8)-1, (int)(imm), \ 6370 _MM_FROUND_CUR_DIRECTION) 6372 #define _mm_mask_roundscale_ss(W, U, A, B, I) \ 6373 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6374 (__v4sf)(__m128)(B), \ 6375 (__v4sf)(__m128)(W), \ 6376 (__mmask8)(U), (int)(I), \ 6377 _MM_FROUND_CUR_DIRECTION) 6379 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ 6380 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6381 (__v4sf)(__m128)(B), \ 6382 (__v4sf)(__m128)(W), \ 6383 (__mmask8)(U), (int)(I), \ 6386 #define _mm_maskz_roundscale_ss(U, A, B, I) \ 6387 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6388 (__v4sf)(__m128)(B), \ 6389 (__v4sf)_mm_setzero_ps(), \ 6390 (__mmask8)(U), (int)(I), \ 6391 _MM_FROUND_CUR_DIRECTION) 6393 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ 6394 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6395 (__v4sf)(__m128)(B), \ 6396 (__v4sf)_mm_setzero_ps(), \ 6397 (__mmask8)(U), (int)(I), \ 6400 #define _mm512_scalef_round_pd(A, B, R) \ 6401 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6402 (__v8df)(__m512d)(B), \ 6403 (__v8df)_mm512_undefined_pd(), \ 6404 (__mmask8)-1, (int)(R)) 6406 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ 6407 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6408 (__v8df)(__m512d)(B), \ 6409 (__v8df)(__m512d)(W), \ 6410 (__mmask8)(U), (int)(R)) 6412 #define _mm512_maskz_scalef_round_pd(U, A, B, R) \ 6413 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6414 (__v8df)(__m512d)(B), \ 6415 (__v8df)_mm512_setzero_pd(), \ 6416 (__mmask8)(U), (int)(R)) 6421 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6432 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6442 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6450 #define _mm512_scalef_round_ps(A, B, R) \ 6451 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6452 (__v16sf)(__m512)(B), \ 6453 (__v16sf)_mm512_undefined_ps(), \ 6454 (__mmask16)-1, (int)(R)) 6456 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ 6457 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6458 (__v16sf)(__m512)(B), \ 6459 (__v16sf)(__m512)(W), \ 6460 (__mmask16)(U), (int)(R)) 6462 #define _mm512_maskz_scalef_round_ps(U, A, B, R) \ 6463 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6464 (__v16sf)(__m512)(B), \ 6465 (__v16sf)_mm512_setzero_ps(), \ 6466 (__mmask16)(U), (int)(R)) 6471 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6482 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6492 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6500 #define _mm_scalef_round_sd(A, B, R) \ 6501 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6502 (__v2df)(__m128d)(B), \ 6503 (__v2df)_mm_setzero_pd(), \ 6504 (__mmask8)-1, (int)(R)) 6509 return (__m128d) __builtin_ia32_scalefsd_round_mask ((
__v2df) __A,
6518 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (
__v2df) __A,
6525 #define _mm_mask_scalef_round_sd(W, U, A, B, R) \ 6526 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6527 (__v2df)(__m128d)(B), \ 6528 (__v2df)(__m128d)(W), \ 6529 (__mmask8)(U), (int)(R)) 6534 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (
__v2df) __A,
6541 #define _mm_maskz_scalef_round_sd(U, A, B, R) \ 6542 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6543 (__v2df)(__m128d)(B), \ 6544 (__v2df)_mm_setzero_pd(), \ 6545 (__mmask8)(U), (int)(R)) 6547 #define _mm_scalef_round_ss(A, B, R) \ 6548 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6549 (__v4sf)(__m128)(B), \ 6550 (__v4sf)_mm_setzero_ps(), \ 6551 (__mmask8)-1, (int)(R)) 6556 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6565 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6572 #define _mm_mask_scalef_round_ss(W, U, A, B, R) \ 6573 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6574 (__v4sf)(__m128)(B), \ 6575 (__v4sf)(__m128)(W), \ 6576 (__mmask8)(U), (int)(R)) 6581 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6588 #define _mm_maskz_scalef_round_ss(U, A, B, R) \ 6589 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6590 (__v4sf)(__m128)(B), \ 6591 (__v4sf)_mm_setzero_ps(), \ 6598 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6604 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6611 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6619 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6625 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6633 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6638 #define _mm512_shuffle_f32x4(A, B, imm) \ 6639 (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ 6640 (__v16sf)(__m512)(B), (int)(imm)) 6642 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ 6643 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6644 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6645 (__v16sf)(__m512)(W)) 6647 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ 6648 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6649 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6650 (__v16sf)_mm512_setzero_ps()) 6652 #define _mm512_shuffle_f64x2(A, B, imm) \ 6653 (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ 6654 (__v8df)(__m512d)(B), (int)(imm)) 6656 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ 6657 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6658 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6659 (__v8df)(__m512d)(W)) 6661 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ 6662 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6663 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6664 (__v8df)_mm512_setzero_pd()) 6666 #define _mm512_shuffle_i32x4(A, B, imm) \ 6667 (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ 6668 (__v16si)(__m512i)(B), (int)(imm)) 6670 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ 6671 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 6672 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6673 (__v16si)(__m512i)(W)) 6675 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ 6676 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 6677 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6678 (__v16si)_mm512_setzero_si512()) 6680 #define _mm512_shuffle_i64x2(A, B, imm) \ 6681 (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ 6682 (__v8di)(__m512i)(B), (int)(imm)) 6684 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ 6685 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 6686 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6687 (__v8di)(__m512i)(W)) 6689 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ 6690 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 6691 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6692 (__v8di)_mm512_setzero_si512()) 6694 #define _mm512_shuffle_pd(A, B, M) \ 6695 (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ 6696 (__v8df)(__m512d)(B), (int)(M)) 6698 #define _mm512_mask_shuffle_pd(W, U, A, B, M) \ 6699 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6700 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6701 (__v8df)(__m512d)(W)) 6703 #define _mm512_maskz_shuffle_pd(U, A, B, M) \ 6704 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6705 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6706 (__v8df)_mm512_setzero_pd()) 6708 #define _mm512_shuffle_ps(A, B, M) \ 6709 (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ 6710 (__v16sf)(__m512)(B), (int)(M)) 6712 #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ 6713 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6714 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6715 (__v16sf)(__m512)(W)) 6717 #define _mm512_maskz_shuffle_ps(U, A, B, M) \ 6718 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6719 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6720 (__v16sf)_mm512_setzero_ps()) 6722 #define _mm_sqrt_round_sd(A, B, R) \ 6723 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6724 (__v2df)(__m128d)(B), \ 6725 (__v2df)_mm_setzero_pd(), \ 6726 (__mmask8)-1, (int)(R)) 6731 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (
__v2df) __A,
6738 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ 6739 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6740 (__v2df)(__m128d)(B), \ 6741 (__v2df)(__m128d)(W), \ 6742 (__mmask8)(U), (int)(R)) 6747 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (
__v2df) __A,
6754 #define _mm_maskz_sqrt_round_sd(U, A, B, R) \ 6755 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6756 (__v2df)(__m128d)(B), \ 6757 (__v2df)_mm_setzero_pd(), \ 6758 (__mmask8)(U), (int)(R)) 6760 #define _mm_sqrt_round_ss(A, B, R) \ 6761 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6762 (__v4sf)(__m128)(B), \ 6763 (__v4sf)_mm_setzero_ps(), \ 6764 (__mmask8)-1, (int)(R)) 6769 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6776 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ 6777 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6778 (__v4sf)(__m128)(B), \ 6779 (__v4sf)(__m128)(W), (__mmask8)(U), \ 6785 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6792 #define _mm_maskz_sqrt_round_ss(U, A, B, R) \ 6793 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6794 (__v4sf)(__m128)(B), \ 6795 (__v4sf)_mm_setzero_ps(), \ 6796 (__mmask8)(U), (int)(R)) 6801 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6802 0, 1, 2, 3, 0, 1, 2, 3,
6803 0, 1, 2, 3, 0, 1, 2, 3);
6809 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6817 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6825 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6826 0, 1, 2, 3, 0, 1, 2, 3);
6832 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6840 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6848 return (__m512i)__builtin_shufflevector((
__v4si)__A, (
__v4si)__A,
6849 0, 1, 2, 3, 0, 1, 2, 3,
6850 0, 1, 2, 3, 0, 1, 2, 3);
6856 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6864 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6872 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6873 0, 1, 2, 3, 0, 1, 2, 3);
6879 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6887 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6895 return (__m512d)__builtin_ia32_selectpd_512(__M,
6903 return (__m512d)__builtin_ia32_selectpd_512(__M,
6911 return (__m512)__builtin_ia32_selectps_512(__M,
6919 return (__m512)__builtin_ia32_selectps_512(__M,
6927 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6935 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6942 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6950 __builtin_ia32_pmovsdb512mem_mask ((
__v16qi *) __P, (__v16si) __A, __M);
6956 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6964 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6965 (__v16hi) __O, __M);
6971 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6979 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6985 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6993 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7000 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7008 __builtin_ia32_pmovsqb512mem_mask ((
__v16qi *) __P, (__v8di) __A, __M);
7014 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7022 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7029 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7037 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7043 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7051 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7058 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7066 __builtin_ia32_pmovsqw512mem_mask ((
__v8hi *) __P, (__v8di) __A, __M);
7072 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7080 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7088 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7096 __builtin_ia32_pmovusdb512mem_mask ((
__v16qi *) __P, (__v16si) __A, __M);
7102 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7110 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7118 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7126 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7132 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7140 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7148 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7156 __builtin_ia32_pmovusqb512mem_mask ((
__v16qi *) __P, (__v8di) __A, __M);
7162 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7170 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7177 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7185 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7191 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7199 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7206 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7214 __builtin_ia32_pmovusqw512mem_mask ((
__v8hi*) __P, (__v8di) __A, __M);
7220 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7228 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7235 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7243 __builtin_ia32_pmovdb512mem_mask ((
__v16qi *) __P, (__v16si) __A, __M);
7249 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7257 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7258 (__v16hi) __O, __M);
7264 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7272 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7278 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7286 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7293 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7301 __builtin_ia32_pmovqb512mem_mask ((
__v16qi *) __P, (__v8di) __A, __M);
7307 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7315 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7322 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7330 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7336 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7344 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7351 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7359 __builtin_ia32_pmovqw512mem_mask ((
__v8hi *) __P, (__v8di) __A, __M);
7362 #define _mm512_extracti32x4_epi32(A, imm) \ 7363 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7364 (__v4si)_mm_undefined_si128(), \ 7367 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ 7368 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7369 (__v4si)(__m128i)(W), \ 7372 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ 7373 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7374 (__v4si)_mm_setzero_si128(), \ 7377 #define _mm512_extracti64x4_epi64(A, imm) \ 7378 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7379 (__v4di)_mm256_undefined_si256(), \ 7382 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ 7383 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7384 (__v4di)(__m256i)(W), \ 7387 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ 7388 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7389 (__v4di)_mm256_setzero_si256(), \ 7392 #define _mm512_insertf64x4(A, B, imm) \ 7393 (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ 7394 (__v4df)(__m256d)(B), (int)(imm)) 7396 #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ 7397 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7398 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7399 (__v8df)(__m512d)(W)) 7401 #define _mm512_maskz_insertf64x4(U, A, B, imm) \ 7402 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7403 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7404 (__v8df)_mm512_setzero_pd()) 7406 #define _mm512_inserti64x4(A, B, imm) \ 7407 (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ 7408 (__v4di)(__m256i)(B), (int)(imm)) 7410 #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ 7411 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7412 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7413 (__v8di)(__m512i)(W)) 7415 #define _mm512_maskz_inserti64x4(U, A, B, imm) \ 7416 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7417 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7418 (__v8di)_mm512_setzero_si512()) 7420 #define _mm512_insertf32x4(A, B, imm) \ 7421 (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ 7422 (__v4sf)(__m128)(B), (int)(imm)) 7424 #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ 7425 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7426 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7427 (__v16sf)(__m512)(W)) 7429 #define _mm512_maskz_insertf32x4(U, A, B, imm) \ 7430 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7431 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7432 (__v16sf)_mm512_setzero_ps()) 7434 #define _mm512_inserti32x4(A, B, imm) \ 7435 (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ 7436 (__v4si)(__m128i)(B), (int)(imm)) 7438 #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ 7439 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7440 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7441 (__v16si)(__m512i)(W)) 7443 #define _mm512_maskz_inserti32x4(U, A, B, imm) \ 7444 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7445 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7446 (__v16si)_mm512_setzero_si512()) 7448 #define _mm512_getmant_round_pd(A, B, C, R) \ 7449 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7450 (int)(((C)<<2) | (B)), \ 7451 (__v8df)_mm512_undefined_pd(), \ 7452 (__mmask8)-1, (int)(R)) 7454 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ 7455 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7456 (int)(((C)<<2) | (B)), \ 7457 (__v8df)(__m512d)(W), \ 7458 (__mmask8)(U), (int)(R)) 7460 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ 7461 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7462 (int)(((C)<<2) | (B)), \ 7463 (__v8df)_mm512_setzero_pd(), \ 7464 (__mmask8)(U), (int)(R)) 7466 #define _mm512_getmant_pd(A, B, C) \ 7467 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7468 (int)(((C)<<2) | (B)), \ 7469 (__v8df)_mm512_setzero_pd(), \ 7471 _MM_FROUND_CUR_DIRECTION) 7473 #define _mm512_mask_getmant_pd(W, U, A, B, C) \ 7474 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7475 (int)(((C)<<2) | (B)), \ 7476 (__v8df)(__m512d)(W), \ 7478 _MM_FROUND_CUR_DIRECTION) 7480 #define _mm512_maskz_getmant_pd(U, A, B, C) \ 7481 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7482 (int)(((C)<<2) | (B)), \ 7483 (__v8df)_mm512_setzero_pd(), \ 7485 _MM_FROUND_CUR_DIRECTION) 7487 #define _mm512_getmant_round_ps(A, B, C, R) \ 7488 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7489 (int)(((C)<<2) | (B)), \ 7490 (__v16sf)_mm512_undefined_ps(), \ 7491 (__mmask16)-1, (int)(R)) 7493 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ 7494 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7495 (int)(((C)<<2) | (B)), \ 7496 (__v16sf)(__m512)(W), \ 7497 (__mmask16)(U), (int)(R)) 7499 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ 7500 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7501 (int)(((C)<<2) | (B)), \ 7502 (__v16sf)_mm512_setzero_ps(), \ 7503 (__mmask16)(U), (int)(R)) 7505 #define _mm512_getmant_ps(A, B, C) \ 7506 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7507 (int)(((C)<<2)|(B)), \ 7508 (__v16sf)_mm512_undefined_ps(), \ 7510 _MM_FROUND_CUR_DIRECTION) 7512 #define _mm512_mask_getmant_ps(W, U, A, B, C) \ 7513 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7514 (int)(((C)<<2)|(B)), \ 7515 (__v16sf)(__m512)(W), \ 7517 _MM_FROUND_CUR_DIRECTION) 7519 #define _mm512_maskz_getmant_ps(U, A, B, C) \ 7520 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7521 (int)(((C)<<2)|(B)), \ 7522 (__v16sf)_mm512_setzero_ps(), \ 7524 _MM_FROUND_CUR_DIRECTION) 7526 #define _mm512_getexp_round_pd(A, R) \ 7527 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7528 (__v8df)_mm512_undefined_pd(), \ 7529 (__mmask8)-1, (int)(R)) 7531 #define _mm512_mask_getexp_round_pd(W, U, A, R) \ 7532 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7533 (__v8df)(__m512d)(W), \ 7534 (__mmask8)(U), (int)(R)) 7536 #define _mm512_maskz_getexp_round_pd(U, A, R) \ 7537 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7538 (__v8df)_mm512_setzero_pd(), \ 7539 (__mmask8)(U), (int)(R)) 7544 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7553 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7562 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7568 #define _mm512_getexp_round_ps(A, R) \ 7569 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7570 (__v16sf)_mm512_undefined_ps(), \ 7571 (__mmask16)-1, (int)(R)) 7573 #define _mm512_mask_getexp_round_ps(W, U, A, R) \ 7574 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7575 (__v16sf)(__m512)(W), \ 7576 (__mmask16)(U), (int)(R)) 7578 #define _mm512_maskz_getexp_round_ps(U, A, R) \ 7579 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7580 (__v16sf)_mm512_setzero_ps(), \ 7581 (__mmask16)(U), (int)(R)) 7586 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7595 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7604 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7610 #define _mm512_i64gather_ps(index, addr, scale) \ 7611 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 7612 (void const *)(addr), \ 7613 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7616 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7617 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ 7618 (void const *)(addr), \ 7619 (__v8di)(__m512i)(index), \ 7620 (__mmask8)(mask), (int)(scale)) 7622 #define _mm512_i64gather_epi32(index, addr, scale) \ 7623 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ 7624 (void const *)(addr), \ 7625 (__v8di)(__m512i)(index), \ 7626 (__mmask8)-1, (int)(scale)) 7628 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7629 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 7630 (void const *)(addr), \ 7631 (__v8di)(__m512i)(index), \ 7632 (__mmask8)(mask), (int)(scale)) 7634 #define _mm512_i64gather_pd(index, addr, scale) \ 7635 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 7636 (void const *)(addr), \ 7637 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7640 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7641 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 7642 (void const *)(addr), \ 7643 (__v8di)(__m512i)(index), \ 7644 (__mmask8)(mask), (int)(scale)) 7646 #define _mm512_i64gather_epi64(index, addr, scale) \ 7647 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ 7648 (void const *)(addr), \ 7649 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7652 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7653 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 7654 (void const *)(addr), \ 7655 (__v8di)(__m512i)(index), \ 7656 (__mmask8)(mask), (int)(scale)) 7658 #define _mm512_i32gather_ps(index, addr, scale) \ 7659 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 7660 (void const *)(addr), \ 7661 (__v16sf)(__m512)(index), \ 7662 (__mmask16)-1, (int)(scale)) 7664 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ 7665 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 7666 (void const *)(addr), \ 7667 (__v16sf)(__m512)(index), \ 7668 (__mmask16)(mask), (int)(scale)) 7670 #define _mm512_i32gather_epi32(index, addr, scale) \ 7671 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 7672 (void const *)(addr), \ 7673 (__v16si)(__m512i)(index), \ 7674 (__mmask16)-1, (int)(scale)) 7676 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 7677 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 7678 (void const *)(addr), \ 7679 (__v16si)(__m512i)(index), \ 7680 (__mmask16)(mask), (int)(scale)) 7682 #define _mm512_i32gather_pd(index, addr, scale) \ 7683 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 7684 (void const *)(addr), \ 7685 (__v8si)(__m256i)(index), (__mmask8)-1, \ 7688 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ 7689 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 7690 (void const *)(addr), \ 7691 (__v8si)(__m256i)(index), \ 7692 (__mmask8)(mask), (int)(scale)) 7694 #define _mm512_i32gather_epi64(index, addr, scale) \ 7695 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 7696 (void const *)(addr), \ 7697 (__v8si)(__m256i)(index), (__mmask8)-1, \ 7700 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 7701 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 7702 (void const *)(addr), \ 7703 (__v8si)(__m256i)(index), \ 7704 (__mmask8)(mask), (int)(scale)) 7706 #define _mm512_i64scatter_ps(addr, index, v1, scale) \ 7707 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \ 7708 (__v8di)(__m512i)(index), \ 7709 (__v8sf)(__m256)(v1), (int)(scale)) 7711 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 7712 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \ 7713 (__v8di)(__m512i)(index), \ 7714 (__v8sf)(__m256)(v1), (int)(scale)) 7716 #define _mm512_i64scatter_epi32(addr, index, v1, scale) \ 7717 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \ 7718 (__v8di)(__m512i)(index), \ 7719 (__v8si)(__m256i)(v1), (int)(scale)) 7721 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 7722 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \ 7723 (__v8di)(__m512i)(index), \ 7724 (__v8si)(__m256i)(v1), (int)(scale)) 7726 #define _mm512_i64scatter_pd(addr, index, v1, scale) \ 7727 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \ 7728 (__v8di)(__m512i)(index), \ 7729 (__v8df)(__m512d)(v1), (int)(scale)) 7731 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 7732 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \ 7733 (__v8di)(__m512i)(index), \ 7734 (__v8df)(__m512d)(v1), (int)(scale)) 7736 #define _mm512_i64scatter_epi64(addr, index, v1, scale) \ 7737 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \ 7738 (__v8di)(__m512i)(index), \ 7739 (__v8di)(__m512i)(v1), (int)(scale)) 7741 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 7742 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \ 7743 (__v8di)(__m512i)(index), \ 7744 (__v8di)(__m512i)(v1), (int)(scale)) 7746 #define _mm512_i32scatter_ps(addr, index, v1, scale) \ 7747 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \ 7748 (__v16si)(__m512i)(index), \ 7749 (__v16sf)(__m512)(v1), (int)(scale)) 7751 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 7752 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \ 7753 (__v16si)(__m512i)(index), \ 7754 (__v16sf)(__m512)(v1), (int)(scale)) 7756 #define _mm512_i32scatter_epi32(addr, index, v1, scale) \ 7757 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \ 7758 (__v16si)(__m512i)(index), \ 7759 (__v16si)(__m512i)(v1), (int)(scale)) 7761 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 7762 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \ 7763 (__v16si)(__m512i)(index), \ 7764 (__v16si)(__m512i)(v1), (int)(scale)) 7766 #define _mm512_i32scatter_pd(addr, index, v1, scale) \ 7767 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \ 7768 (__v8si)(__m256i)(index), \ 7769 (__v8df)(__m512d)(v1), (int)(scale)) 7771 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 7772 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \ 7773 (__v8si)(__m256i)(index), \ 7774 (__v8df)(__m512d)(v1), (int)(scale)) 7776 #define _mm512_i32scatter_epi64(addr, index, v1, scale) \ 7777 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \ 7778 (__v8si)(__m256i)(index), \ 7779 (__v8di)(__m512i)(v1), (int)(scale)) 7781 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 7782 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \ 7783 (__v8si)(__m256i)(index), \ 7784 (__v8di)(__m512i)(v1), (int)(scale)) 7789 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7796 #define _mm_fmadd_round_ss(A, B, C, R) \ 7797 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7798 (__v4sf)(__m128)(B), \ 7799 (__v4sf)(__m128)(C), (__mmask8)-1, \ 7802 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ 7803 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7804 (__v4sf)(__m128)(A), \ 7805 (__v4sf)(__m128)(B), (__mmask8)(U), \ 7811 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7818 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ 7819 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7820 (__v4sf)(__m128)(B), \ 7821 (__v4sf)(__m128)(C), (__mmask8)(U), \ 7827 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7834 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ 7835 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 7836 (__v4sf)(__m128)(X), \ 7837 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7843 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7850 #define _mm_fmsub_round_ss(A, B, C, R) \ 7851 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7852 (__v4sf)(__m128)(B), \ 7853 -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7856 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ 7857 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7858 (__v4sf)(__m128)(A), \ 7859 -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7865 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7872 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ 7873 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7874 (__v4sf)(__m128)(B), \ 7875 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 7881 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7888 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ 7889 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 7890 (__v4sf)(__m128)(X), \ 7891 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7897 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7904 #define _mm_fnmadd_round_ss(A, B, C, R) \ 7905 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7906 -(__v4sf)(__m128)(B), \ 7907 (__v4sf)(__m128)(C), (__mmask8)-1, \ 7910 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ 7911 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7912 -(__v4sf)(__m128)(A), \ 7913 (__v4sf)(__m128)(B), (__mmask8)(U), \ 7919 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7926 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ 7927 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7928 -(__v4sf)(__m128)(B), \ 7929 (__v4sf)(__m128)(C), (__mmask8)(U), \ 7935 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7942 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ 7943 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 7944 -(__v4sf)(__m128)(X), \ 7945 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7951 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7958 #define _mm_fnmsub_round_ss(A, B, C, R) \ 7959 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7960 -(__v4sf)(__m128)(B), \ 7961 -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7964 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ 7965 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7966 -(__v4sf)(__m128)(A), \ 7967 -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7973 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7980 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ 7981 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7982 -(__v4sf)(__m128)(B), \ 7983 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 7989 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7996 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ 7997 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 7998 -(__v4sf)(__m128)(X), \ 7999 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8005 return __builtin_ia32_vfmaddsd3_mask((
__v2df)__W,
8012 #define _mm_fmadd_round_sd(A, B, C, R) \ 8013 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8014 (__v2df)(__m128d)(B), \ 8015 (__v2df)(__m128d)(C), (__mmask8)-1, \ 8018 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ 8019 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8020 (__v2df)(__m128d)(A), \ 8021 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8027 return __builtin_ia32_vfmaddsd3_maskz((
__v2df)__A,
8034 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ 8035 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8036 (__v2df)(__m128d)(B), \ 8037 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8043 return __builtin_ia32_vfmaddsd3_mask3((
__v2df)__W,
8050 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ 8051 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8052 (__v2df)(__m128d)(X), \ 8053 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8059 return __builtin_ia32_vfmaddsd3_mask((
__v2df)__W,
8066 #define _mm_fmsub_round_sd(A, B, C, R) \ 8067 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8068 (__v2df)(__m128d)(B), \ 8069 -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8072 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ 8073 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8074 (__v2df)(__m128d)(A), \ 8075 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8081 return __builtin_ia32_vfmaddsd3_maskz((
__v2df)__A,
8088 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ 8089 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8090 (__v2df)(__m128d)(B), \ 8091 -(__v2df)(__m128d)(C), \ 8092 (__mmask8)(U), (int)(R)) 8097 return __builtin_ia32_vfmsubsd3_mask3((
__v2df)__W,
8104 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ 8105 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8106 (__v2df)(__m128d)(X), \ 8107 (__v2df)(__m128d)(Y), \ 8108 (__mmask8)(U), (int)(R)) 8113 return __builtin_ia32_vfmaddsd3_mask((
__v2df)__W,
8120 #define _mm_fnmadd_round_sd(A, B, C, R) \ 8121 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8122 -(__v2df)(__m128d)(B), \ 8123 (__v2df)(__m128d)(C), (__mmask8)-1, \ 8126 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ 8127 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8128 -(__v2df)(__m128d)(A), \ 8129 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8135 return __builtin_ia32_vfmaddsd3_maskz((
__v2df)__A,
8142 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ 8143 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8144 -(__v2df)(__m128d)(B), \ 8145 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8151 return __builtin_ia32_vfmaddsd3_mask3((
__v2df)__W,
8158 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ 8159 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8160 -(__v2df)(__m128d)(X), \ 8161 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8167 return __builtin_ia32_vfmaddsd3_mask((
__v2df)__W,
8174 #define _mm_fnmsub_round_sd(A, B, C, R) \ 8175 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8176 -(__v2df)(__m128d)(B), \ 8177 -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8180 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ 8181 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8182 -(__v2df)(__m128d)(A), \ 8183 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8189 return __builtin_ia32_vfmaddsd3_maskz((
__v2df)__A,
8196 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ 8197 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8198 -(__v2df)(__m128d)(B), \ 8199 -(__v2df)(__m128d)(C), \ 8206 return __builtin_ia32_vfmsubsd3_mask3((
__v2df)__W,
8213 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ 8214 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8215 -(__v2df)(__m128d)(X), \ 8216 (__v2df)(__m128d)(Y), \ 8217 (__mmask8)(U), (int)(R)) 8219 #define _mm512_permutex_pd(X, C) \ 8220 (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)) 8222 #define _mm512_mask_permutex_pd(W, U, X, C) \ 8223 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8224 (__v8df)_mm512_permutex_pd((X), (C)), \ 8225 (__v8df)(__m512d)(W)) 8227 #define _mm512_maskz_permutex_pd(U, X, C) \ 8228 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8229 (__v8df)_mm512_permutex_pd((X), (C)), \ 8230 (__v8df)_mm512_setzero_pd()) 8232 #define _mm512_permutex_epi64(X, C) \ 8233 (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)) 8235 #define _mm512_mask_permutex_epi64(W, U, X, C) \ 8236 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8237 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8238 (__v8di)(__m512i)(W)) 8240 #define _mm512_maskz_permutex_epi64(U, X, C) \ 8241 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8242 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8243 (__v8di)_mm512_setzero_si512()) 8248 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8254 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8262 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8270 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8276 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8285 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8293 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8299 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8307 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8315 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8318 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 8323 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8332 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8337 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 8342 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8348 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8354 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8360 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8366 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8372 return (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8378 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8383 *__C = (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8384 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8390 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8396 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8402 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8405 #define _kand_mask16 _mm512_kand 8406 #define _kandn_mask16 _mm512_kandn 8407 #define _knot_mask16 _mm512_knot 8408 #define _kor_mask16 _mm512_kor 8409 #define _kxnor_mask16 _mm512_kxnor 8410 #define _kxor_mask16 _mm512_kxor 8412 #define _kshiftli_mask16(A, I) \ 8413 (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)) 8415 #define _kshiftri_mask16(A, I) \ 8416 (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)) 8420 return (
unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8425 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8430 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8435 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)
__B);
8442 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8449 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)__P);
8456 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8462 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8463 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8469 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8477 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8486 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8494 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8503 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8511 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8520 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8528 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8534 #define _mm_cmp_round_ss_mask(X, Y, P, R) \ 8535 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8536 (__v4sf)(__m128)(Y), (int)(P), \ 8537 (__mmask8)-1, (int)(R)) 8539 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 8540 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8541 (__v4sf)(__m128)(Y), (int)(P), \ 8542 (__mmask8)(M), (int)(R)) 8544 #define _mm_cmp_ss_mask(X, Y, P) \ 8545 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8546 (__v4sf)(__m128)(Y), (int)(P), \ 8548 _MM_FROUND_CUR_DIRECTION) 8550 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 8551 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8552 (__v4sf)(__m128)(Y), (int)(P), \ 8554 _MM_FROUND_CUR_DIRECTION) 8556 #define _mm_cmp_round_sd_mask(X, Y, P, R) \ 8557 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8558 (__v2df)(__m128d)(Y), (int)(P), \ 8559 (__mmask8)-1, (int)(R)) 8561 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 8562 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8563 (__v2df)(__m128d)(Y), (int)(P), \ 8564 (__mmask8)(M), (int)(R)) 8566 #define _mm_cmp_sd_mask(X, Y, P) \ 8567 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8568 (__v2df)(__m128d)(Y), (int)(P), \ 8570 _MM_FROUND_CUR_DIRECTION) 8572 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 8573 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8574 (__v2df)(__m128d)(Y), (int)(P), \ 8576 _MM_FROUND_CUR_DIRECTION) 8639 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8640 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8646 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8654 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8662 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8663 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8669 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8677 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8685 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B), __W);
8691 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B),
8698 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B), __W);
8704 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B),
8711 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8717 __builtin_ia32_storesd128_mask ((
__v2df *)__W, __A, __U & 1);
8723 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf)
__W,
8727 return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
8733 return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
8745 return (__m128d) __builtin_ia32_loadsd128_mask ((
__v2df *) __A, src, __U & 1);
8751 return (__m128d) __builtin_ia32_loadsd128_mask ((
__v2df *) __A,
8756 #define _mm512_shuffle_epi32(A, I) \ 8757 (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)) 8759 #define _mm512_mask_shuffle_epi32(W, U, A, I) \ 8760 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8761 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8762 (__v16si)(__m512i)(W)) 8764 #define _mm512_maskz_shuffle_epi32(U, A, I) \ 8765 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8766 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8767 (__v16si)_mm512_setzero_si512()) 8772 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8780 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8788 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8796 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8804 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
8812 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
8820 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
8828 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
8836 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
8844 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
8852 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
8860 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
8868 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8876 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8884 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8892 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8897 #define _mm512_cvt_roundps_pd(A, R) \ 8898 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8899 (__v8df)_mm512_undefined_pd(), \ 8900 (__mmask8)-1, (int)(R)) 8902 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ 8903 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8904 (__v8df)(__m512d)(W), \ 8905 (__mmask8)(U), (int)(R)) 8907 #define _mm512_maskz_cvt_roundps_pd(U, A, R) \ 8908 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8909 (__v8df)_mm512_setzero_pd(), \ 8910 (__mmask8)(U), (int)(R)) 8915 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8921 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8929 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8949 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8957 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8965 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8973 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8981 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8988 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8995 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9002 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9006 #define _mm_cvt_roundsd_ss(A, B, R) \ 9007 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9008 (__v2df)(__m128d)(B), \ 9009 (__v4sf)_mm_undefined_ps(), \ 9010 (__mmask8)-1, (int)(R)) 9012 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ 9013 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9014 (__v2df)(__m128d)(B), \ 9015 (__v4sf)(__m128)(W), \ 9016 (__mmask8)(U), (int)(R)) 9018 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ 9019 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9020 (__v2df)(__m128d)(B), \ 9021 (__v4sf)_mm_setzero_ps(), \ 9022 (__mmask8)(U), (int)(R)) 9027 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9036 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9042 #define _mm_cvtss_i32 _mm_cvtss_si32 9043 #define _mm_cvtsd_i32 _mm_cvtsd_si32 9044 #define _mm_cvti32_sd _mm_cvtsi32_sd 9045 #define _mm_cvti32_ss _mm_cvtsi32_ss 9047 #define _mm_cvtss_i64 _mm_cvtss_si64 9048 #define _mm_cvtsd_i64 _mm_cvtsd_si64 9049 #define _mm_cvti64_sd _mm_cvtsi64_sd 9050 #define _mm_cvti64_ss _mm_cvtsi64_ss 9054 #define _mm_cvt_roundi64_sd(A, B, R) \ 9055 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9058 #define _mm_cvt_roundsi64_sd(A, B, R) \ 9059 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9063 #define _mm_cvt_roundsi32_ss(A, B, R) \ 9064 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) 9066 #define _mm_cvt_roundi32_ss(A, B, R) \ 9067 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) 9070 #define _mm_cvt_roundsi64_ss(A, B, R) \ 9071 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9074 #define _mm_cvt_roundi64_ss(A, B, R) \ 9075 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9079 #define _mm_cvt_roundss_sd(A, B, R) \ 9080 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9081 (__v4sf)(__m128)(B), \ 9082 (__v2df)_mm_undefined_pd(), \ 9083 (__mmask8)-1, (int)(R)) 9085 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ 9086 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9087 (__v4sf)(__m128)(B), \ 9088 (__v2df)(__m128d)(W), \ 9089 (__mmask8)(U), (int)(R)) 9091 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ 9092 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9093 (__v4sf)(__m128)(B), \ 9094 (__v2df)_mm_setzero_pd(), \ 9095 (__mmask8)(U), (int)(R)) 9100 return __builtin_ia32_cvtss2sd_round_mask((
__v2df)__A,
9109 return __builtin_ia32_cvtss2sd_round_mask((
__v2df)__A,
9123 #define _mm_cvt_roundu64_sd(A, B, R) \ 9124 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9125 (unsigned long long)(B), (int)(R)) 9128 _mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
9135 #define _mm_cvt_roundu32_ss(A, B, R) \ 9136 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9147 #define _mm_cvt_roundu64_ss(A, B, R) \ 9148 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9149 (unsigned long long)(B), (int)(R)) 9152 _mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
9162 return (__m512i) __builtin_ia32_selectd_512(__M,
9170 return (__m512i) __builtin_ia32_selectq_512(__M,
9177 char __e58,
char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
9178 char __e52,
char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
9179 char __e46,
char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
9180 char __e40,
char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
9181 char __e34,
char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
9182 char __e28,
char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
9183 char __e22,
char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
9184 char __e16,
char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
9185 char __e10,
char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
9186 char __e4,
char __e3,
char __e2,
char __e1,
char __e0) {
9188 return __extension__ (__m512i)(__v64qi)
9189 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9190 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9191 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9192 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9193 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9194 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9195 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9196 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9201 short __e27,
short __e26,
short __e25,
short __e24,
short __e23,
9202 short __e22,
short __e21,
short __e20,
short __e19,
short __e18,
9203 short __e17,
short __e16,
short __e15,
short __e14,
short __e13,
9204 short __e12,
short __e11,
short __e10,
short __e9,
short __e8,
9205 short __e7,
short __e6,
short __e5,
short __e4,
short __e3,
9206 short __e2,
short __e1,
short __e0) {
9207 return __extension__ (__m512i)(__v32hi)
9208 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9209 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9210 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9211 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9216 int __E,
int __F,
int __G,
int __H,
9217 int __I,
int __J,
int __K,
int __L,
9218 int __M,
int __N,
int __O,
int __P)
9220 return __extension__ (__m512i)(__v16si)
9221 {
__P, __O,
__N, __M, __L, __K, __J,
__I,
9222 __H, __G, __F, __E,
__D,
__C,
__B, __A };
9225 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9226 e8,e9,e10,e11,e12,e13,e14,e15) \ 9227 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 9228 (e5),(e4),(e3),(e2),(e1),(e0)) 9232 long long __D,
long long __E,
long long __F,
9233 long long __G,
long long __H)
9235 return __extension__ (__m512i) (__v8di)
9236 { __H, __G, __F, __E,
__D,
__C,
__B, __A };
9239 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9240 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9244 double __E,
double __F,
double __G,
double __H)
9246 return __extension__ (__m512d)
9247 { __H, __G, __F, __E,
__D,
__C,
__B, __A };
9250 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9251 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9255 float __E,
float __F,
float __G,
float __H,
9256 float __I,
float __J,
float __K,
float __L,
9257 float __M,
float __N,
float __O,
float __P)
9259 return __extension__ (__m512)
9260 {
__P, __O,
__N, __M, __L, __K, __J,
__I,
9261 __H, __G, __F, __E,
__D,
__C,
__B, __A };
9264 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9265 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 9266 (e4),(e3),(e2),(e1),(e0)) 9302 #define _mm512_mask_reduce_operator(op) \ 9303 __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \ 9304 __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \ 9305 __m256i __t3 = (__m256i)(__t1 op __t2); \ 9306 __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \ 9307 __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \ 9308 __v2du __t6 = __t4 op __t5; \ 9309 __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9310 __v2du __t8 = __t6 op __t7; \ 9352 #undef _mm512_mask_reduce_operator 9354 #define _mm512_mask_reduce_operator(op) \ 9355 __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \ 9356 __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \ 9357 __m256d __t3 = __t1 op __t2; \ 9358 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ 9359 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ 9360 __m128d __t6 = __t4 op __t5; \ 9361 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9362 __m128d __t8 = __t6 op __t7; \ 9384 #undef _mm512_mask_reduce_operator 9386 #define _mm512_mask_reduce_operator(op) \ 9387 __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \ 9388 __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \ 9389 __m256i __t3 = (__m256i)(__t1 op __t2); \ 9390 __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \ 9391 __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \ 9392 __v4su __t6 = __t4 op __t5; \ 9393 __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9394 __v4su __t8 = __t6 op __t7; \ 9395 __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9396 __v4su __t10 = __t8 op __t9; \ 9442 #undef _mm512_mask_reduce_operator 9444 #define _mm512_mask_reduce_operator(op) \ 9445 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \ 9446 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \ 9447 __m256 __t3 = __t1 op __t2; \ 9448 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ 9449 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ 9450 __m128 __t6 = __t4 op __t5; \ 9451 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9452 __m128 __t8 = __t6 op __t7; \ 9453 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9454 __m128 __t10 = __t8 op __t9; \ 9478 #undef _mm512_mask_reduce_operator 9480 #define _mm512_mask_reduce_operator(op) \ 9481 __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \ 9482 __m512i __t2 = _mm512_##op(__V, __t1); \ 9483 __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \ 9484 __m512i __t4 = _mm512_##op(__t2, __t3); \ 9485 __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \ 9486 __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ 9532 #undef _mm512_mask_reduce_operator 9534 #define _mm512_mask_reduce_operator(op) \ 9535 __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \ 9536 __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \ 9537 __m256i __t3 = _mm256_##op(__t1, __t2); \ 9538 __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \ 9539 __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \ 9540 __m128i __t6 = _mm_##op(__t4, __t5); \ 9541 __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \ 9542 __m128i __t8 = _mm_##op(__t6, __t7); \ 9543 __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \ 9544 __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ 9590 #undef _mm512_mask_reduce_operator 9592 #define _mm512_mask_reduce_operator(op) \ 9593 __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \ 9594 __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \ 9595 __m256d __t3 = _mm256_##op(__t1, __t2); \ 9596 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ 9597 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ 9598 __m128d __t6 = _mm_##op(__t4, __t5); \ 9599 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9600 __m128d __t8 = _mm_##op(__t6, __t7); \ 9624 #undef _mm512_mask_reduce_operator 9626 #define _mm512_mask_reduce_operator(op) \ 9627 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \ 9628 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \ 9629 __m256 __t3 = _mm256_##op(__t1, __t2); \ 9630 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ 9631 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ 9632 __m128 __t6 = _mm_##op(__t4, __t5); \ 9633 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9634 __m128 __t8 = _mm_##op(__t6, __t7); \ 9635 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9636 __m128 __t10 = _mm_##op(__t8, __t9); \ 9660 #undef _mm512_mask_reduce_operator 9662 #undef __DEFAULT_FN_ATTRS512 9663 #undef __DEFAULT_FN_ATTRS128 9664 #undef __DEFAULT_FN_ATTRS static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
struct __storeu_i16 *__P __v
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
#define __DEFAULT_FN_ATTRS512
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
__vector signed char __v16qi
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ void const void * __src
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
__inline __m128 const float __Y
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
__inline __m128 __m64 const * __P
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
__vector unsigned char __v16qu
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned char int __C
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
__inline __m128d double __X
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ void int __a
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
#define _mm512_mask_reduce_operator(op)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
__inline void __m128d __A
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(__m512i *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
__inline __m128d __m128d __B
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ vector float vector float __b
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(double *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS128
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b)
__inline __m128 const float const float const float __W
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
__inline void enum _mm_hint __I
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ unsigned int unsigned char __D
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A)
__inline __m128i const int __N
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
__vector long long __v2di
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
__vector unsigned short __v8hu
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(float *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b)