24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 27 #ifndef __AVX512FINTRIN_H 28 #define __AVX512FINTRIN_H 34 typedef long long __v8di
__attribute__((__vector_size__(64)));
38 typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
45 typedef long long __m512i
__attribute__((__vector_size__(64)));
51 #define _MM_FROUND_TO_NEAREST_INT 0x00 52 #define _MM_FROUND_TO_NEG_INF 0x01 53 #define _MM_FROUND_TO_POS_INF 0x02 54 #define _MM_FROUND_TO_ZERO 0x03 55 #define _MM_FROUND_CUR_DIRECTION 0x04 65 #define _MM_CMPINT_GE _MM_CMPINT_NLT 67 #define _MM_CMPINT_GT _MM_CMPINT_NLE 176 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) 177 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) 178 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 185 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
188 #define _mm512_setzero_epi32 _mm512_setzero_si512 193 return (__m512d)__builtin_ia32_undef512();
199 return (__m512)__builtin_ia32_undef512();
205 return (__m512)__builtin_ia32_undef512();
211 return (__m512i)__builtin_ia32_undef512();
217 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
218 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
224 return (__m512i)__builtin_ia32_selectd_512(__M,
232 return (__m512i)__builtin_ia32_selectd_512(__M,
240 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
241 0, 0, 0, 0, 0, 0, 0, 0);
247 return (__m512i)__builtin_ia32_selectq_512(__M,
256 return (__m512i)__builtin_ia32_selectq_512(__M,
265 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
266 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
269 #define _mm512_setzero _mm512_setzero_ps 274 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
280 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
281 __w, __w, __w, __w, __w, __w, __w, __w };
287 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
293 return __extension__ (__m512i)(__v64qi){
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w };
307 return __extension__ (__m512i)(__v32hi){
308 __w, __w, __w, __w, __w, __w, __w, __w,
309 __w, __w, __w, __w, __w, __w, __w, __w,
310 __w, __w, __w, __w, __w, __w, __w, __w,
311 __w, __w, __w, __w, __w, __w, __w, __w };
317 return __extension__ (__m512i)(__v16si){
318 __s, __s, __s, __s, __s, __s, __s, __s,
319 __s, __s, __s, __s, __s, __s, __s, __s };
325 return (__m512i)__builtin_ia32_selectd_512(__M,
333 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
339 return (__m512i)__builtin_ia32_selectq_512(__M,
347 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
348 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
354 return __extension__ (__m512i)(__v16si)
355 {
__D, __C, __B, __A,
__D, __C, __B, __A,
356 __D, __C, __B, __A,
__D, __C, __B, __A };
363 return __extension__ (__m512i) (__v8di)
364 {
__D, __C, __B, __A,
__D, __C, __B, __A };
370 return __extension__ (__m512d)
371 {
__D, __C, __B, __A,
__D, __C, __B, __A };
377 return __extension__ (__m512)
378 {
__D, __C, __B, __A,
__D, __C, __B, __A,
379 __D, __C, __B, __A,
__D, __C, __B, __A };
382 #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 383 _mm512_set4_epi32((e3),(e2),(e1),(e0)) 385 #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 386 _mm512_set4_epi64((e3),(e2),(e1),(e0)) 388 #define _mm512_setr4_pd(e0,e1,e2,e3) \ 389 _mm512_set4_pd((e3),(e2),(e1),(e0)) 391 #define _mm512_setr4_ps(e0,e1,e2,e3) \ 392 _mm512_set4_ps((e3),(e2),(e1),(e0)) 397 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
398 0, 0, 0, 0, 0, 0, 0, 0);
406 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
412 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
413 -1, -1, -1, -1, -1, -1, -1, -1);
419 return __builtin_shufflevector(__a, __a, 0, 1);
425 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
431 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
437 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
443 return (__m512) (__A);
449 return (__m512i) (__A);
455 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
461 return (__m512d) (__A);
467 return (__m512i) (__A);
473 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
479 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
485 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
491 return (__m512) (__A);
497 return (__m512d) (__A);
503 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
509 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
515 return (__mmask16)
__a;
540 return __builtin_shufflevector((__v2df)__a, (__v2df)
_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
559 return __builtin_shufflevector((__v4df)__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
577 return __builtin_shufflevector((__v4sf)__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
595 return __builtin_shufflevector((__v8sf)__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
613 return __builtin_shufflevector((__v2di)__a, (__v2di)
_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
631 return __builtin_shufflevector((__v4di)__a, (__v4di)
_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
638 return (__m512i)((__v16su)__a & (__v16su)
__b);
644 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
659 return (__m512i)((__v8du)__a & (__v8du)
__b);
665 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
680 return (__m512i)(~(__v8du)__A & (__v8du)__B);
686 return (__m512i)(~(__v16su)__A & (__v16su)__B);
692 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
707 return (__m512i)(~(__v8du)__A & (__v8du)__B);
713 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
728 return (__m512i)((__v16su)__a | (__v16su)
__b);
734 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
748 return (__m512i)((__v8du)__a | (__v8du)
__b);
754 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
768 return (__m512i)((__v16su)__a ^ (__v16su)
__b);
774 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
788 return (__m512i)((__v8du)__a ^ (__v8du)
__b);
794 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
808 return (__m512i)((__v8du)__a & (__v8du)
__b);
814 return (__m512i)((__v8du)__a | (__v8du)
__b);
820 return (__m512i)((__v8du)__a ^ (__v8du)
__b);
828 return (__m512d)((__v8df)__a + (__v8df)
__b);
834 return (__m512)((__v16sf)__a + (__v16sf)
__b);
840 return (__m512d)((__v8df)__a * (__v8df)
__b);
846 return (__m512)((__v16sf)__a * (__v16sf)
__b);
852 return (__m512d)((__v8df)__a - (__v8df)
__b);
858 return (__m512)((__v16sf)__a - (__v16sf)
__b);
864 return (__m512i) ((__v8du) __A + (__v8du) __B);
870 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
878 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
886 return (__m512i) ((__v8du) __A - (__v8du) __B);
892 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
900 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
908 return (__m512i) ((__v16su) __A + (__v16su) __B);
914 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
922 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
930 return (__m512i) ((__v16su) __A - (__v16su) __B);
936 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
944 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
949 #define _mm512_max_round_pd(A, B, R) \ 950 (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ 951 (__v8df)(__m512d)(B), (int)(R)) 953 #define _mm512_mask_max_round_pd(W, U, A, B, R) \ 954 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 955 (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 958 #define _mm512_maskz_max_round_pd(U, A, B, R) \ 959 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 960 (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 961 (__v8df)_mm512_setzero_pd()) 966 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
973 return (__m512d)__builtin_ia32_selectpd_512(__U,
981 return (__m512d)__builtin_ia32_selectpd_512(__U,
986 #define _mm512_max_round_ps(A, B, R) \ 987 (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ 988 (__v16sf)(__m512)(B), (int)(R)) 990 #define _mm512_mask_max_round_ps(W, U, A, B, R) \ 991 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 992 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 995 #define _mm512_maskz_max_round_ps(U, A, B, R) \ 996 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 997 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 998 (__v16sf)_mm512_setzero_ps()) 1003 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1010 return (__m512)__builtin_ia32_selectps_512(__U,
1018 return (__m512)__builtin_ia32_selectps_512(__U,
1025 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1034 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1041 #define _mm_max_round_ss(A, B, R) \ 1042 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1043 (__v4sf)(__m128)(B), \ 1044 (__v4sf)_mm_setzero_ps(), \ 1045 (__mmask8)-1, (int)(R)) 1047 #define _mm_mask_max_round_ss(W, U, A, B, R) \ 1048 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1049 (__v4sf)(__m128)(B), \ 1050 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1053 #define _mm_maskz_max_round_ss(U, A, B, R) \ 1054 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1055 (__v4sf)(__m128)(B), \ 1056 (__v4sf)_mm_setzero_ps(), \ 1057 (__mmask8)(U), (int)(R)) 1061 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1070 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1077 #define _mm_max_round_sd(A, B, R) \ 1078 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1079 (__v2df)(__m128d)(B), \ 1080 (__v2df)_mm_setzero_pd(), \ 1081 (__mmask8)-1, (int)(R)) 1083 #define _mm_mask_max_round_sd(W, U, A, B, R) \ 1084 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1085 (__v2df)(__m128d)(B), \ 1086 (__v2df)(__m128d)(W), \ 1087 (__mmask8)(U), (int)(R)) 1089 #define _mm_maskz_max_round_sd(U, A, B, R) \ 1090 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1091 (__v2df)(__m128d)(B), \ 1092 (__v2df)_mm_setzero_pd(), \ 1093 (__mmask8)(U), (int)(R)) 1095 static __inline __m512i
1099 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1105 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1113 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1121 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1127 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1135 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1143 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1149 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1157 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1165 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1171 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1179 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1184 #define _mm512_min_round_pd(A, B, R) \ 1185 (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ 1186 (__v8df)(__m512d)(B), (int)(R)) 1188 #define _mm512_mask_min_round_pd(W, U, A, B, R) \ 1189 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1190 (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1193 #define _mm512_maskz_min_round_pd(U, A, B, R) \ 1194 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1195 (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1196 (__v8df)_mm512_setzero_pd()) 1201 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1208 return (__m512d)__builtin_ia32_selectpd_512(__U,
1216 return (__m512d)__builtin_ia32_selectpd_512(__U,
1221 #define _mm512_min_round_ps(A, B, R) \ 1222 (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ 1223 (__v16sf)(__m512)(B), (int)(R)) 1225 #define _mm512_mask_min_round_ps(W, U, A, B, R) \ 1226 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1227 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1230 #define _mm512_maskz_min_round_ps(U, A, B, R) \ 1231 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1232 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1233 (__v16sf)_mm512_setzero_ps()) 1238 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1245 return (__m512)__builtin_ia32_selectps_512(__U,
1253 return (__m512)__builtin_ia32_selectps_512(__U,
1260 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1269 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1276 #define _mm_min_round_ss(A, B, R) \ 1277 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1278 (__v4sf)(__m128)(B), \ 1279 (__v4sf)_mm_setzero_ps(), \ 1280 (__mmask8)-1, (int)(R)) 1282 #define _mm_mask_min_round_ss(W, U, A, B, R) \ 1283 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1284 (__v4sf)(__m128)(B), \ 1285 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1288 #define _mm_maskz_min_round_ss(U, A, B, R) \ 1289 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1290 (__v4sf)(__m128)(B), \ 1291 (__v4sf)_mm_setzero_ps(), \ 1292 (__mmask8)(U), (int)(R)) 1296 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1305 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1312 #define _mm_min_round_sd(A, B, R) \ 1313 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1314 (__v2df)(__m128d)(B), \ 1315 (__v2df)_mm_setzero_pd(), \ 1316 (__mmask8)-1, (int)(R)) 1318 #define _mm_mask_min_round_sd(W, U, A, B, R) \ 1319 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1320 (__v2df)(__m128d)(B), \ 1321 (__v2df)(__m128d)(W), \ 1322 (__mmask8)(U), (int)(R)) 1324 #define _mm_maskz_min_round_sd(U, A, B, R) \ 1325 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1326 (__v2df)(__m128d)(B), \ 1327 (__v2df)_mm_setzero_pd(), \ 1328 (__mmask8)(U), (int)(R)) 1330 static __inline __m512i
1334 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1340 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1348 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1356 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1362 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1370 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1378 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1384 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1392 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1400 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1406 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1414 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1422 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1428 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1436 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1444 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1450 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1458 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1466 return (__m512i) ((__v16su) __A * (__v16su) __B);
1472 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1480 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1487 return (__m512i) ((__v8du) __A * (__v8du) __B);
1492 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1497 #define _mm512_sqrt_round_pd(A, R) \ 1498 (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)) 1500 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ 1501 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1502 (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1503 (__v8df)(__m512d)(W)) 1505 #define _mm512_maskz_sqrt_round_pd(U, A, R) \ 1506 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1507 (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1508 (__v8df)_mm512_setzero_pd()) 1513 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1520 return (__m512d)__builtin_ia32_selectpd_512(__U,
1528 return (__m512d)__builtin_ia32_selectpd_512(__U,
1533 #define _mm512_sqrt_round_ps(A, R) \ 1534 (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)) 1536 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ 1537 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1538 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1539 (__v16sf)(__m512)(W)) 1541 #define _mm512_maskz_sqrt_round_ps(U, A, R) \ 1542 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1543 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1544 (__v16sf)_mm512_setzero_ps()) 1549 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1556 return (__m512)__builtin_ia32_selectps_512(__U,
1564 return (__m512)__builtin_ia32_selectps_512(__U,
1572 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1580 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1588 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1597 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1606 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1614 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1623 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1633 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1642 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1651 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1661 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1670 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1679 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1688 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1696 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1705 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1714 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1722 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1731 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1741 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1750 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1759 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1769 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1778 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1787 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1796 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1805 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1814 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1823 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1832 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1841 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1850 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1859 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1865 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1873 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1881 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1887 return (__m512i)__builtin_ia32_selectd_512(__U,
1895 return (__m512i)__builtin_ia32_selectd_512(__U,
1903 return __builtin_ia32_selectss_128(__U, __A, __W);
1912 #define _mm_add_round_ss(A, B, R) \ 1913 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1914 (__v4sf)(__m128)(B), \ 1915 (__v4sf)_mm_setzero_ps(), \ 1916 (__mmask8)-1, (int)(R)) 1918 #define _mm_mask_add_round_ss(W, U, A, B, R) \ 1919 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1920 (__v4sf)(__m128)(B), \ 1921 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1924 #define _mm_maskz_add_round_ss(U, A, B, R) \ 1925 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1926 (__v4sf)(__m128)(B), \ 1927 (__v4sf)_mm_setzero_ps(), \ 1928 (__mmask8)(U), (int)(R)) 1933 return __builtin_ia32_selectsd_128(__U, __A, __W);
1941 #define _mm_add_round_sd(A, B, R) \ 1942 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1943 (__v2df)(__m128d)(B), \ 1944 (__v2df)_mm_setzero_pd(), \ 1945 (__mmask8)-1, (int)(R)) 1947 #define _mm_mask_add_round_sd(W, U, A, B, R) \ 1948 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1949 (__v2df)(__m128d)(B), \ 1950 (__v2df)(__m128d)(W), \ 1951 (__mmask8)(U), (int)(R)) 1953 #define _mm_maskz_add_round_sd(U, A, B, R) \ 1954 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1955 (__v2df)(__m128d)(B), \ 1956 (__v2df)_mm_setzero_pd(), \ 1957 (__mmask8)(U), (int)(R)) 1961 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1968 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1975 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1982 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1987 #define _mm512_add_round_pd(A, B, R) \ 1988 (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ 1989 (__v8df)(__m512d)(B), (int)(R)) 1991 #define _mm512_mask_add_round_pd(W, U, A, B, R) \ 1992 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1993 (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 1994 (__v8df)(__m512d)(W)); 1996 #define _mm512_maskz_add_round_pd(U, A, B, R) \ 1997 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1998 (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 1999 (__v8df)_mm512_setzero_pd()); 2001 #define _mm512_add_round_ps(A, B, R) \ 2002 (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ 2003 (__v16sf)(__m512)(B), (int)(R)) 2005 #define _mm512_mask_add_round_ps(W, U, A, B, R) \ 2006 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2007 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 2008 (__v16sf)(__m512)(W)); 2010 #define _mm512_maskz_add_round_ps(U, A, B, R) \ 2011 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2012 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 2013 (__v16sf)_mm512_setzero_ps()); 2018 return __builtin_ia32_selectss_128(__U, __A, __W);
2026 #define _mm_sub_round_ss(A, B, R) \ 2027 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2028 (__v4sf)(__m128)(B), \ 2029 (__v4sf)_mm_setzero_ps(), \ 2030 (__mmask8)-1, (int)(R)) 2032 #define _mm_mask_sub_round_ss(W, U, A, B, R) \ 2033 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2034 (__v4sf)(__m128)(B), \ 2035 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2038 #define _mm_maskz_sub_round_ss(U, A, B, R) \ 2039 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2040 (__v4sf)(__m128)(B), \ 2041 (__v4sf)_mm_setzero_ps(), \ 2042 (__mmask8)(U), (int)(R)) 2047 return __builtin_ia32_selectsd_128(__U, __A, __W);
2056 #define _mm_sub_round_sd(A, B, R) \ 2057 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2058 (__v2df)(__m128d)(B), \ 2059 (__v2df)_mm_setzero_pd(), \ 2060 (__mmask8)-1, (int)(R)) 2062 #define _mm_mask_sub_round_sd(W, U, A, B, R) \ 2063 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2064 (__v2df)(__m128d)(B), \ 2065 (__v2df)(__m128d)(W), \ 2066 (__mmask8)(U), (int)(R)) 2068 #define _mm_maskz_sub_round_sd(U, A, B, R) \ 2069 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2070 (__v2df)(__m128d)(B), \ 2071 (__v2df)_mm_setzero_pd(), \ 2072 (__mmask8)(U), (int)(R)) 2076 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2083 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2090 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2097 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2102 #define _mm512_sub_round_pd(A, B, R) \ 2103 (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ 2104 (__v8df)(__m512d)(B), (int)(R)) 2106 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ 2107 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2108 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2109 (__v8df)(__m512d)(W)); 2111 #define _mm512_maskz_sub_round_pd(U, A, B, R) \ 2112 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2113 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2114 (__v8df)_mm512_setzero_pd()); 2116 #define _mm512_sub_round_ps(A, B, R) \ 2117 (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ 2118 (__v16sf)(__m512)(B), (int)(R)) 2120 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ 2121 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2122 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2123 (__v16sf)(__m512)(W)); 2125 #define _mm512_maskz_sub_round_ps(U, A, B, R) \ 2126 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2127 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2128 (__v16sf)_mm512_setzero_ps()); 2133 return __builtin_ia32_selectss_128(__U, __A, __W);
2141 #define _mm_mul_round_ss(A, B, R) \ 2142 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2143 (__v4sf)(__m128)(B), \ 2144 (__v4sf)_mm_setzero_ps(), \ 2145 (__mmask8)-1, (int)(R)) 2147 #define _mm_mask_mul_round_ss(W, U, A, B, R) \ 2148 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2149 (__v4sf)(__m128)(B), \ 2150 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2153 #define _mm_maskz_mul_round_ss(U, A, B, R) \ 2154 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2155 (__v4sf)(__m128)(B), \ 2156 (__v4sf)_mm_setzero_ps(), \ 2157 (__mmask8)(U), (int)(R)) 2162 return __builtin_ia32_selectsd_128(__U, __A, __W);
2171 #define _mm_mul_round_sd(A, B, R) \ 2172 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2173 (__v2df)(__m128d)(B), \ 2174 (__v2df)_mm_setzero_pd(), \ 2175 (__mmask8)-1, (int)(R)) 2177 #define _mm_mask_mul_round_sd(W, U, A, B, R) \ 2178 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2179 (__v2df)(__m128d)(B), \ 2180 (__v2df)(__m128d)(W), \ 2181 (__mmask8)(U), (int)(R)) 2183 #define _mm_maskz_mul_round_sd(U, A, B, R) \ 2184 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2185 (__v2df)(__m128d)(B), \ 2186 (__v2df)_mm_setzero_pd(), \ 2187 (__mmask8)(U), (int)(R)) 2191 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2198 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2205 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2212 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2217 #define _mm512_mul_round_pd(A, B, R) \ 2218 (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ 2219 (__v8df)(__m512d)(B), (int)(R)) 2221 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ 2222 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2223 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2224 (__v8df)(__m512d)(W)); 2226 #define _mm512_maskz_mul_round_pd(U, A, B, R) \ 2227 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2228 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2229 (__v8df)_mm512_setzero_pd()); 2231 #define _mm512_mul_round_ps(A, B, R) \ 2232 (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ 2233 (__v16sf)(__m512)(B), (int)(R)) 2235 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ 2236 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2237 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2238 (__v16sf)(__m512)(W)); 2240 #define _mm512_maskz_mul_round_ps(U, A, B, R) \ 2241 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2242 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2243 (__v16sf)_mm512_setzero_ps()); 2248 return __builtin_ia32_selectss_128(__U, __A, __W);
2257 #define _mm_div_round_ss(A, B, R) \ 2258 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2259 (__v4sf)(__m128)(B), \ 2260 (__v4sf)_mm_setzero_ps(), \ 2261 (__mmask8)-1, (int)(R)) 2263 #define _mm_mask_div_round_ss(W, U, A, B, R) \ 2264 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2265 (__v4sf)(__m128)(B), \ 2266 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2269 #define _mm_maskz_div_round_ss(U, A, B, R) \ 2270 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2271 (__v4sf)(__m128)(B), \ 2272 (__v4sf)_mm_setzero_ps(), \ 2273 (__mmask8)(U), (int)(R)) 2278 return __builtin_ia32_selectsd_128(__U, __A, __W);
2287 #define _mm_div_round_sd(A, B, R) \ 2288 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2289 (__v2df)(__m128d)(B), \ 2290 (__v2df)_mm_setzero_pd(), \ 2291 (__mmask8)-1, (int)(R)) 2293 #define _mm_mask_div_round_sd(W, U, A, B, R) \ 2294 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2295 (__v2df)(__m128d)(B), \ 2296 (__v2df)(__m128d)(W), \ 2297 (__mmask8)(U), (int)(R)) 2299 #define _mm_maskz_div_round_sd(U, A, B, R) \ 2300 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2301 (__v2df)(__m128d)(B), \ 2302 (__v2df)_mm_setzero_pd(), \ 2303 (__mmask8)(U), (int)(R)) 2308 return (__m512d)((__v8df)__a/(__v8df)
__b);
2313 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2320 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2328 return (__m512)((__v16sf)__a/(__v16sf)
__b);
2333 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2340 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2345 #define _mm512_div_round_pd(A, B, R) \ 2346 (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ 2347 (__v8df)(__m512d)(B), (int)(R)) 2349 #define _mm512_mask_div_round_pd(W, U, A, B, R) \ 2350 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2351 (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2352 (__v8df)(__m512d)(W)); 2354 #define _mm512_maskz_div_round_pd(U, A, B, R) \ 2355 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2356 (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2357 (__v8df)_mm512_setzero_pd()); 2359 #define _mm512_div_round_ps(A, B, R) \ 2360 (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ 2361 (__v16sf)(__m512)(B), (int)(R)) 2363 #define _mm512_mask_div_round_ps(W, U, A, B, R) \ 2364 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2365 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2366 (__v16sf)(__m512)(W)); 2368 #define _mm512_maskz_div_round_ps(U, A, B, R) \ 2369 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2370 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2371 (__v16sf)_mm512_setzero_ps()); 2373 #define _mm512_roundscale_ps(A, B) \ 2374 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2375 (__v16sf)_mm512_undefined_ps(), \ 2377 _MM_FROUND_CUR_DIRECTION) 2379 #define _mm512_mask_roundscale_ps(A, B, C, imm) \ 2380 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2381 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2382 _MM_FROUND_CUR_DIRECTION) 2384 #define _mm512_maskz_roundscale_ps(A, B, imm) \ 2385 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2386 (__v16sf)_mm512_setzero_ps(), \ 2388 _MM_FROUND_CUR_DIRECTION) 2390 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ 2391 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2392 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2395 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ 2396 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2397 (__v16sf)_mm512_setzero_ps(), \ 2398 (__mmask16)(A), (int)(R)) 2400 #define _mm512_roundscale_round_ps(A, imm, R) \ 2401 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 2402 (__v16sf)_mm512_undefined_ps(), \ 2403 (__mmask16)-1, (int)(R)) 2405 #define _mm512_roundscale_pd(A, B) \ 2406 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2407 (__v8df)_mm512_undefined_pd(), \ 2409 _MM_FROUND_CUR_DIRECTION) 2411 #define _mm512_mask_roundscale_pd(A, B, C, imm) \ 2412 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2413 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2414 _MM_FROUND_CUR_DIRECTION) 2416 #define _mm512_maskz_roundscale_pd(A, B, imm) \ 2417 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2418 (__v8df)_mm512_setzero_pd(), \ 2420 _MM_FROUND_CUR_DIRECTION) 2422 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ 2423 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2424 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2427 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ 2428 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2429 (__v8df)_mm512_setzero_pd(), \ 2430 (__mmask8)(A), (int)(R)) 2432 #define _mm512_roundscale_round_pd(A, imm, R) \ 2433 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 2434 (__v8df)_mm512_undefined_pd(), \ 2435 (__mmask8)-1, (int)(R)) 2437 #define _mm512_fmadd_round_pd(A, B, C, R) \ 2438 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2439 (__v8df)(__m512d)(B), \ 2440 (__v8df)(__m512d)(C), \ 2441 (__mmask8)-1, (int)(R)) 2444 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 2445 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2446 (__v8df)(__m512d)(B), \ 2447 (__v8df)(__m512d)(C), \ 2448 (__mmask8)(U), (int)(R)) 2451 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 2452 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2453 (__v8df)(__m512d)(B), \ 2454 (__v8df)(__m512d)(C), \ 2455 (__mmask8)(U), (int)(R)) 2458 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 2459 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2460 (__v8df)(__m512d)(B), \ 2461 (__v8df)(__m512d)(C), \ 2462 (__mmask8)(U), (int)(R)) 2465 #define _mm512_fmsub_round_pd(A, B, C, R) \ 2466 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2467 (__v8df)(__m512d)(B), \ 2468 -(__v8df)(__m512d)(C), \ 2469 (__mmask8)-1, (int)(R)) 2472 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 2473 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2474 (__v8df)(__m512d)(B), \ 2475 -(__v8df)(__m512d)(C), \ 2476 (__mmask8)(U), (int)(R)) 2479 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 2480 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2481 (__v8df)(__m512d)(B), \ 2482 -(__v8df)(__m512d)(C), \ 2483 (__mmask8)(U), (int)(R)) 2486 #define _mm512_fnmadd_round_pd(A, B, C, R) \ 2487 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2488 (__v8df)(__m512d)(B), \ 2489 (__v8df)(__m512d)(C), \ 2490 (__mmask8)-1, (int)(R)) 2493 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 2494 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2495 (__v8df)(__m512d)(B), \ 2496 (__v8df)(__m512d)(C), \ 2497 (__mmask8)(U), (int)(R)) 2500 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 2501 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2502 (__v8df)(__m512d)(B), \ 2503 (__v8df)(__m512d)(C), \ 2504 (__mmask8)(U), (int)(R)) 2507 #define _mm512_fnmsub_round_pd(A, B, C, R) \ 2508 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2509 (__v8df)(__m512d)(B), \ 2510 -(__v8df)(__m512d)(C), \ 2511 (__mmask8)-1, (int)(R)) 2514 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 2515 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2516 (__v8df)(__m512d)(B), \ 2517 -(__v8df)(__m512d)(C), \ 2518 (__mmask8)(U), (int)(R)) 2524 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2534 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2544 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2554 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2564 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2574 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2584 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2594 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2604 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2614 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2624 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2634 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2641 #define _mm512_fmadd_round_ps(A, B, C, R) \ 2642 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2643 (__v16sf)(__m512)(B), \ 2644 (__v16sf)(__m512)(C), \ 2645 (__mmask16)-1, (int)(R)) 2648 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 2649 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2650 (__v16sf)(__m512)(B), \ 2651 (__v16sf)(__m512)(C), \ 2652 (__mmask16)(U), (int)(R)) 2655 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 2656 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2657 (__v16sf)(__m512)(B), \ 2658 (__v16sf)(__m512)(C), \ 2659 (__mmask16)(U), (int)(R)) 2662 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 2663 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2664 (__v16sf)(__m512)(B), \ 2665 (__v16sf)(__m512)(C), \ 2666 (__mmask16)(U), (int)(R)) 2669 #define _mm512_fmsub_round_ps(A, B, C, R) \ 2670 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2671 (__v16sf)(__m512)(B), \ 2672 -(__v16sf)(__m512)(C), \ 2673 (__mmask16)-1, (int)(R)) 2676 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 2677 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2678 (__v16sf)(__m512)(B), \ 2679 -(__v16sf)(__m512)(C), \ 2680 (__mmask16)(U), (int)(R)) 2683 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 2684 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2685 (__v16sf)(__m512)(B), \ 2686 -(__v16sf)(__m512)(C), \ 2687 (__mmask16)(U), (int)(R)) 2690 #define _mm512_fnmadd_round_ps(A, B, C, R) \ 2691 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2692 -(__v16sf)(__m512)(B), \ 2693 (__v16sf)(__m512)(C), \ 2694 (__mmask16)-1, (int)(R)) 2697 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 2698 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2699 (__v16sf)(__m512)(B), \ 2700 (__v16sf)(__m512)(C), \ 2701 (__mmask16)(U), (int)(R)) 2704 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 2705 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2706 (__v16sf)(__m512)(B), \ 2707 (__v16sf)(__m512)(C), \ 2708 (__mmask16)(U), (int)(R)) 2711 #define _mm512_fnmsub_round_ps(A, B, C, R) \ 2712 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2713 -(__v16sf)(__m512)(B), \ 2714 -(__v16sf)(__m512)(C), \ 2715 (__mmask16)-1, (int)(R)) 2718 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 2719 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2720 (__v16sf)(__m512)(B), \ 2721 -(__v16sf)(__m512)(C), \ 2722 (__mmask16)(U), (int)(R)) 2728 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2738 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2748 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2758 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2768 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2778 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2788 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2798 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2808 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2818 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2828 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2838 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2845 #define _mm512_fmaddsub_round_pd(A, B, C, R) \ 2846 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2847 (__v8df)(__m512d)(B), \ 2848 (__v8df)(__m512d)(C), \ 2849 (__mmask8)-1, (int)(R)) 2852 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 2853 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2854 (__v8df)(__m512d)(B), \ 2855 (__v8df)(__m512d)(C), \ 2856 (__mmask8)(U), (int)(R)) 2859 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 2860 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 2861 (__v8df)(__m512d)(B), \ 2862 (__v8df)(__m512d)(C), \ 2863 (__mmask8)(U), (int)(R)) 2866 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 2867 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2868 (__v8df)(__m512d)(B), \ 2869 (__v8df)(__m512d)(C), \ 2870 (__mmask8)(U), (int)(R)) 2873 #define _mm512_fmsubadd_round_pd(A, B, C, R) \ 2874 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2875 (__v8df)(__m512d)(B), \ 2876 -(__v8df)(__m512d)(C), \ 2877 (__mmask8)-1, (int)(R)) 2880 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 2881 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2882 (__v8df)(__m512d)(B), \ 2883 -(__v8df)(__m512d)(C), \ 2884 (__mmask8)(U), (int)(R)) 2887 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 2888 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2889 (__v8df)(__m512d)(B), \ 2890 -(__v8df)(__m512d)(C), \ 2891 (__mmask8)(U), (int)(R)) 2897 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2907 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2917 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2927 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2937 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2947 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2957 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2964 #define _mm512_fmaddsub_round_ps(A, B, C, R) \ 2965 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2966 (__v16sf)(__m512)(B), \ 2967 (__v16sf)(__m512)(C), \ 2968 (__mmask16)-1, (int)(R)) 2971 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 2972 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2973 (__v16sf)(__m512)(B), \ 2974 (__v16sf)(__m512)(C), \ 2975 (__mmask16)(U), (int)(R)) 2978 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 2979 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 2980 (__v16sf)(__m512)(B), \ 2981 (__v16sf)(__m512)(C), \ 2982 (__mmask16)(U), (int)(R)) 2985 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 2986 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 2987 (__v16sf)(__m512)(B), \ 2988 (__v16sf)(__m512)(C), \ 2989 (__mmask16)(U), (int)(R)) 2992 #define _mm512_fmsubadd_round_ps(A, B, C, R) \ 2993 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2994 (__v16sf)(__m512)(B), \ 2995 -(__v16sf)(__m512)(C), \ 2996 (__mmask16)-1, (int)(R)) 2999 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 3000 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3001 (__v16sf)(__m512)(B), \ 3002 -(__v16sf)(__m512)(C), \ 3003 (__mmask16)(U), (int)(R)) 3006 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 3007 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3008 (__v16sf)(__m512)(B), \ 3009 -(__v16sf)(__m512)(C), \ 3010 (__mmask16)(U), (int)(R)) 3016 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3026 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3036 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3046 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3056 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3066 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3076 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3083 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3084 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3085 (__v8df)(__m512d)(B), \ 3086 (__v8df)(__m512d)(C), \ 3087 (__mmask8)(U), (int)(R)) 3093 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3100 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3101 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3102 (__v16sf)(__m512)(B), \ 3103 (__v16sf)(__m512)(C), \ 3104 (__mmask16)(U), (int)(R)) 3109 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3116 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3117 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3118 (__v8df)(__m512d)(B), \ 3119 (__v8df)(__m512d)(C), \ 3120 (__mmask8)(U), (int)(R)) 3126 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3133 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3134 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3135 (__v16sf)(__m512)(B), \ 3136 (__v16sf)(__m512)(C), \ 3137 (__mmask16)(U), (int)(R)) 3143 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3150 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3151 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 3152 -(__v8df)(__m512d)(B), \ 3153 (__v8df)(__m512d)(C), \ 3154 (__mmask8)(U), (int)(R)) 3160 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3167 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3168 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 3169 -(__v16sf)(__m512)(B), \ 3170 (__v16sf)(__m512)(C), \ 3171 (__mmask16)(U), (int)(R)) 3177 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3184 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3185 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 3186 -(__v8df)(__m512d)(B), \ 3187 -(__v8df)(__m512d)(C), \ 3188 (__mmask8)(U), (int)(R)) 3191 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3192 (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ 3193 (__v8df)(__m512d)(B), \ 3194 (__v8df)(__m512d)(C), \ 3195 (__mmask8)(U), (int)(R)) 3201 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3211 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3218 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3219 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 3220 -(__v16sf)(__m512)(B), \ 3221 -(__v16sf)(__m512)(C), \ 3222 (__mmask16)(U), (int)(R)) 3225 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3226 (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ 3227 (__v16sf)(__m512)(B), \ 3228 (__v16sf)(__m512)(C), \ 3229 (__mmask16)(U), (int)(R)) 3235 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3245 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3259 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3267 return (__m512i)__builtin_ia32_selectd_512(__U,
3276 return (__m512i)__builtin_ia32_selectd_512(__U,
3285 return (__m512i)__builtin_ia32_selectd_512(__U,
3293 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3301 return (__m512i)__builtin_ia32_selectq_512(__U,
3310 return (__m512i)__builtin_ia32_selectq_512(__U,
3319 return (__m512i)__builtin_ia32_selectq_512(__U,
3324 #define _mm512_alignr_epi64(A, B, I) \ 3325 (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ 3326 (__v8di)(__m512i)(B), (int)(I)) 3328 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ 3329 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3330 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3331 (__v8di)(__m512i)(W)) 3333 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \ 3334 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3335 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3336 (__v8di)_mm512_setzero_si512()) 3338 #define _mm512_alignr_epi32(A, B, I) \ 3339 (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ 3340 (__v16si)(__m512i)(B), (int)(I)) 3342 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ 3343 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3344 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3345 (__v16si)(__m512i)(W)) 3347 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \ 3348 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3349 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3350 (__v16si)_mm512_setzero_si512()) 3353 #define _mm512_extractf64x4_pd(A, I) \ 3354 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ 3355 (__v4df)_mm256_undefined_pd(), \ 3358 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ 3359 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3360 (__v4df)(__m256d)(W), \ 3363 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \ 3364 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3365 (__v4df)_mm256_setzero_pd(), \ 3368 #define _mm512_extractf32x4_ps(A, I) \ 3369 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ 3370 (__v4sf)_mm_undefined_ps(), \ 3373 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ 3374 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3375 (__v4sf)(__m128)(W), \ 3378 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \ 3379 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3380 (__v4sf)_mm_setzero_ps(), \ 3388 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3396 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3404 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3412 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3419 #define _mm512_cmp_round_ps_mask(A, B, P, R) \ 3420 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3421 (__v16sf)(__m512)(B), (int)(P), \ 3422 (__mmask16)-1, (int)(R)) 3424 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ 3425 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3426 (__v16sf)(__m512)(B), (int)(P), \ 3427 (__mmask16)(U), (int)(R)) 3429 #define _mm512_cmp_ps_mask(A, B, P) \ 3430 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3431 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3432 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3434 #define _mm512_cmpeq_ps_mask(A, B) \ 3435 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 3436 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 3437 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 3439 #define _mm512_cmplt_ps_mask(A, B) \ 3440 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 3441 #define _mm512_mask_cmplt_ps_mask(k, A, B) \ 3442 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 3444 #define _mm512_cmple_ps_mask(A, B) \ 3445 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 3446 #define _mm512_mask_cmple_ps_mask(k, A, B) \ 3447 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 3449 #define _mm512_cmpunord_ps_mask(A, B) \ 3450 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 3451 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 3452 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 3454 #define _mm512_cmpneq_ps_mask(A, B) \ 3455 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 3456 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 3457 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 3459 #define _mm512_cmpnlt_ps_mask(A, B) \ 3460 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 3461 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 3462 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 3464 #define _mm512_cmpnle_ps_mask(A, B) \ 3465 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 3466 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 3467 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 3469 #define _mm512_cmpord_ps_mask(A, B) \ 3470 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 3471 #define _mm512_mask_cmpord_ps_mask(k, A, B) \ 3472 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 3474 #define _mm512_cmp_round_pd_mask(A, B, P, R) \ 3475 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3476 (__v8df)(__m512d)(B), (int)(P), \ 3477 (__mmask8)-1, (int)(R)) 3479 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ 3480 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3481 (__v8df)(__m512d)(B), (int)(P), \ 3482 (__mmask8)(U), (int)(R)) 3484 #define _mm512_cmp_pd_mask(A, B, P) \ 3485 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3486 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3487 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3489 #define _mm512_cmpeq_pd_mask(A, B) \ 3490 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 3491 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 3492 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 3494 #define _mm512_cmplt_pd_mask(A, B) \ 3495 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 3496 #define _mm512_mask_cmplt_pd_mask(k, A, B) \ 3497 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 3499 #define _mm512_cmple_pd_mask(A, B) \ 3500 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 3501 #define _mm512_mask_cmple_pd_mask(k, A, B) \ 3502 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 3504 #define _mm512_cmpunord_pd_mask(A, B) \ 3505 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 3506 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 3507 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 3509 #define _mm512_cmpneq_pd_mask(A, B) \ 3510 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 3511 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 3512 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 3514 #define _mm512_cmpnlt_pd_mask(A, B) \ 3515 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 3516 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 3517 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 3519 #define _mm512_cmpnle_pd_mask(A, B) \ 3520 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 3521 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 3522 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 3524 #define _mm512_cmpord_pd_mask(A, B) \ 3525 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 3526 #define _mm512_mask_cmpord_pd_mask(k, A, B) \ 3527 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 3531 #define _mm512_cvtt_roundps_epu32(A, R) \ 3532 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3533 (__v16si)_mm512_undefined_epi32(), \ 3534 (__mmask16)-1, (int)(R)) 3536 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ 3537 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3538 (__v16si)(__m512i)(W), \ 3539 (__mmask16)(U), (int)(R)) 3541 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ 3542 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3543 (__v16si)_mm512_setzero_si512(), \ 3544 (__mmask16)(U), (int)(R)) 3550 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3560 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3569 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3575 #define _mm512_cvt_roundepi32_ps(A, R) \ 3576 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3577 (__v16sf)_mm512_setzero_ps(), \ 3578 (__mmask16)-1, (int)(R)) 3580 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ 3581 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3582 (__v16sf)(__m512)(W), \ 3583 (__mmask16)(U), (int)(R)) 3585 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ 3586 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3587 (__v16sf)_mm512_setzero_ps(), \ 3588 (__mmask16)(U), (int)(R)) 3590 #define _mm512_cvt_roundepu32_ps(A, R) \ 3591 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3592 (__v16sf)_mm512_setzero_ps(), \ 3593 (__mmask16)-1, (int)(R)) 3595 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ 3596 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3597 (__v16sf)(__m512)(W), \ 3598 (__mmask16)(U), (int)(R)) 3600 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ 3601 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3602 (__v16sf)_mm512_setzero_ps(), \ 3603 (__mmask16)(U), (int)(R)) 3608 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3614 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3622 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3630 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3636 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3644 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3664 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3670 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3678 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3686 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3692 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3700 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3717 #define _mm512_cvt_roundpd_ps(A, R) \ 3718 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3719 (__v8sf)_mm256_setzero_ps(), \ 3720 (__mmask8)-1, (int)(R)) 3722 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ 3723 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3724 (__v8sf)(__m256)(W), (__mmask8)(U), \ 3727 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ 3728 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3729 (__v8sf)_mm256_setzero_ps(), \ 3730 (__mmask8)(U), (int)(R)) 3735 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3744 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3753 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3762 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3764 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3770 return (__m512) __builtin_shufflevector (
3774 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3777 #define _mm512_cvt_roundps_ph(A, I) \ 3778 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3779 (__v16hi)_mm256_undefined_si256(), \ 3782 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 3783 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3784 (__v16hi)(__m256i)(U), \ 3787 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 3788 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3789 (__v16hi)_mm256_setzero_si256(), \ 3792 #define _mm512_cvtps_ph(A, I) \ 3793 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3794 (__v16hi)_mm256_setzero_si256(), \ 3797 #define _mm512_mask_cvtps_ph(U, W, A, I) \ 3798 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3799 (__v16hi)(__m256i)(U), \ 3802 #define _mm512_maskz_cvtps_ph(W, A, I) \ 3803 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3804 (__v16hi)_mm256_setzero_si256(), \ 3807 #define _mm512_cvt_roundph_ps(A, R) \ 3808 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3809 (__v16sf)_mm512_undefined_ps(), \ 3810 (__mmask16)-1, (int)(R)) 3812 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ 3813 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3814 (__v16sf)(__m512)(W), \ 3815 (__mmask16)(U), (int)(R)) 3817 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \ 3818 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3819 (__v16sf)_mm512_setzero_ps(), \ 3820 (__mmask16)(U), (int)(R)) 3826 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3836 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3845 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3851 #define _mm512_cvtt_roundpd_epi32(A, R) \ 3852 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3853 (__v8si)_mm256_setzero_si256(), \ 3854 (__mmask8)-1, (int)(R)) 3856 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ 3857 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3858 (__v8si)(__m256i)(W), \ 3859 (__mmask8)(U), (int)(R)) 3861 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ 3862 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3863 (__v8si)_mm256_setzero_si256(), \ 3864 (__mmask8)(U), (int)(R)) 3869 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3878 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3887 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3893 #define _mm512_cvtt_roundps_epi32(A, R) \ 3894 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3895 (__v16si)_mm512_setzero_si512(), \ 3896 (__mmask16)-1, (int)(R)) 3898 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ 3899 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3900 (__v16si)(__m512i)(W), \ 3901 (__mmask16)(U), (int)(R)) 3903 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ 3904 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3905 (__v16si)_mm512_setzero_si512(), \ 3906 (__mmask16)(U), (int)(R)) 3912 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3920 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3929 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3935 #define _mm512_cvt_roundps_epi32(A, R) \ 3936 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3937 (__v16si)_mm512_setzero_si512(), \ 3938 (__mmask16)-1, (int)(R)) 3940 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ 3941 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3942 (__v16si)(__m512i)(W), \ 3943 (__mmask16)(U), (int)(R)) 3945 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ 3946 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3947 (__v16si)_mm512_setzero_si512(), \ 3948 (__mmask16)(U), (int)(R)) 3953 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3962 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3971 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3978 #define _mm512_cvt_roundpd_epi32(A, R) \ 3979 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3980 (__v8si)_mm256_setzero_si256(), \ 3981 (__mmask8)-1, (int)(R)) 3983 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ 3984 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3985 (__v8si)(__m256i)(W), \ 3986 (__mmask8)(U), (int)(R)) 3988 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ 3989 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3990 (__v8si)_mm256_setzero_si256(), \ 3991 (__mmask8)(U), (int)(R)) 3996 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4006 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4015 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4022 #define _mm512_cvt_roundps_epu32(A, R) \ 4023 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4024 (__v16si)_mm512_setzero_si512(), \ 4025 (__mmask16)-1, (int)(R)) 4027 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ 4028 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4029 (__v16si)(__m512i)(W), \ 4030 (__mmask16)(U), (int)(R)) 4032 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ 4033 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4034 (__v16si)_mm512_setzero_si512(), \ 4035 (__mmask16)(U), (int)(R)) 4040 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4050 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4059 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4066 #define _mm512_cvt_roundpd_epu32(A, R) \ 4067 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4068 (__v8si)_mm256_setzero_si256(), \ 4069 (__mmask8)-1, (int)(R)) 4071 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ 4072 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4073 (__v8si)(__m256i)(W), \ 4074 (__mmask8)(U), (int)(R)) 4076 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ 4077 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4078 (__v8si)_mm256_setzero_si256(), \ 4079 (__mmask8)(U), (int)(R)) 4084 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4094 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4103 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4127 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4128 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4134 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4142 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4150 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4151 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4157 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4165 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4173 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4175 2+4, 18+4, 3+4, 19+4,
4176 2+8, 18+8, 3+8, 19+8,
4177 2+12, 18+12, 3+12, 19+12);
4183 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4191 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4199 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4201 0+4, 16+4, 1+4, 17+4,
4202 0+8, 16+8, 1+8, 17+8,
4203 0+12, 16+12, 1+12, 17+12);
4209 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4217 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4225 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4227 2+4, 18+4, 3+4, 19+4,
4228 2+8, 18+8, 3+8, 19+8,
4229 2+12, 18+12, 3+12, 19+12);
4235 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4243 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4251 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4253 0+4, 16+4, 1+4, 17+4,
4254 0+8, 16+8, 1+8, 17+8,
4255 0+12, 16+12, 1+12, 17+12);
4261 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4269 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4277 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4278 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4284 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4292 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4300 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4301 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4307 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4315 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4326 struct __loadu_si512 {
4329 return ((
struct __loadu_si512*)__P)->__v;
4335 struct __loadu_epi32 {
4338 return ((
struct __loadu_epi32*)__P)->__v;
4344 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *) __P,
4353 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)__P,
4362 struct __loadu_epi64 {
4365 return ((
struct __loadu_epi64*)__P)->__v;
4371 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *) __P,
4379 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)__P,
4388 return (__m512) __builtin_ia32_loadups512_mask ((
const float *) __P,
4396 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)__P,
4405 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *) __P,
4413 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)__P,
4425 return ((
struct __loadu_pd*)__p)->__v;
4434 return ((
struct __loadu_ps*)__p)->__v;
4440 return *(__m512*)__p;
4446 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *) __P,
4454 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)__P,
4463 return *(__m512d*)__p;
4469 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *) __P,
4477 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)__P,
4486 return *(__m512i *) __P;
4492 return *(__m512i *) __P;
4498 return *(__m512i *) __P;
4506 struct __storeu_epi64 {
4509 ((
struct __storeu_epi64*)__P)->__v = __A;
4515 __builtin_ia32_storedqudi512_mask ((
long long *)__P, (__v8di) __A,
4522 struct __storeu_si512 {
4525 ((
struct __storeu_si512*)__P)->__v = __A;
4531 struct __storeu_epi32 {
4534 ((
struct __storeu_epi32*)__P)->__v = __A;
4540 __builtin_ia32_storedqusi512_mask ((
int *)__P, (__v16si) __A,
4547 __builtin_ia32_storeupd512_mask ((
double *)__P, (__v8df) __A, (__mmask8) __U);
4553 struct __storeu_pd {
4556 ((
struct __storeu_pd*)__P)->__v = __A;
4562 __builtin_ia32_storeups512_mask ((
float *)__P, (__v16sf) __A,
4569 struct __storeu_ps {
4572 ((
struct __storeu_ps*)__P)->__v = __A;
4578 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4584 *(__m512d*)__P = __A;
4590 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4597 *(__m512*)__P = __A;
4603 *(__m512i *) __P = __A;
4609 *(__m512i *) __P = __A;
4615 *(__m512i *) __P = __A;
4623 return __builtin_ia32_knothi(__M);
4628 #define _mm512_cmpeq_epi32_mask(A, B) \ 4629 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 4630 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \ 4631 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 4632 #define _mm512_cmpge_epi32_mask(A, B) \ 4633 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 4634 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \ 4635 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 4636 #define _mm512_cmpgt_epi32_mask(A, B) \ 4637 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 4638 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \ 4639 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 4640 #define _mm512_cmple_epi32_mask(A, B) \ 4641 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 4642 #define _mm512_mask_cmple_epi32_mask(k, A, B) \ 4643 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 4644 #define _mm512_cmplt_epi32_mask(A, B) \ 4645 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 4646 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \ 4647 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 4648 #define _mm512_cmpneq_epi32_mask(A, B) \ 4649 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 4650 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \ 4651 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 4653 #define _mm512_cmpeq_epu32_mask(A, B) \ 4654 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 4655 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \ 4656 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 4657 #define _mm512_cmpge_epu32_mask(A, B) \ 4658 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 4659 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \ 4660 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 4661 #define _mm512_cmpgt_epu32_mask(A, B) \ 4662 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 4663 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \ 4664 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 4665 #define _mm512_cmple_epu32_mask(A, B) \ 4666 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 4667 #define _mm512_mask_cmple_epu32_mask(k, A, B) \ 4668 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 4669 #define _mm512_cmplt_epu32_mask(A, B) \ 4670 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 4671 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \ 4672 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 4673 #define _mm512_cmpneq_epu32_mask(A, B) \ 4674 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 4675 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \ 4676 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 4678 #define _mm512_cmpeq_epi64_mask(A, B) \ 4679 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 4680 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \ 4681 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 4682 #define _mm512_cmpge_epi64_mask(A, B) \ 4683 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 4684 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \ 4685 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 4686 #define _mm512_cmpgt_epi64_mask(A, B) \ 4687 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 4688 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \ 4689 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 4690 #define _mm512_cmple_epi64_mask(A, B) \ 4691 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 4692 #define _mm512_mask_cmple_epi64_mask(k, A, B) \ 4693 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 4694 #define _mm512_cmplt_epi64_mask(A, B) \ 4695 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 4696 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \ 4697 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 4698 #define _mm512_cmpneq_epi64_mask(A, B) \ 4699 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 4700 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \ 4701 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 4703 #define _mm512_cmpeq_epu64_mask(A, B) \ 4704 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 4705 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \ 4706 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 4707 #define _mm512_cmpge_epu64_mask(A, B) \ 4708 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 4709 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \ 4710 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 4711 #define _mm512_cmpgt_epu64_mask(A, B) \ 4712 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 4713 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \ 4714 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 4715 #define _mm512_cmple_epu64_mask(A, B) \ 4716 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 4717 #define _mm512_mask_cmple_epu64_mask(k, A, B) \ 4718 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 4719 #define _mm512_cmplt_epu64_mask(A, B) \ 4720 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 4721 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \ 4722 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 4723 #define _mm512_cmpneq_epu64_mask(A, B) \ 4724 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 4725 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ 4726 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 4733 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4739 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4747 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4757 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4763 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4771 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4779 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4785 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4793 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4801 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4807 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4815 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4823 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4829 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4837 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4845 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4851 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4859 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4867 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4873 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4881 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4889 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4895 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4903 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4911 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4917 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4925 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4933 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4939 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4947 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4955 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4961 return (__m512i)__builtin_ia32_selectd_512(__U,
4969 return (__m512i)__builtin_ia32_selectd_512(__U,
4977 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4983 return (__m512i)__builtin_ia32_selectq_512(__U,
4991 return (__m512i)__builtin_ia32_selectq_512(__U,
4998 #define _mm512_cmp_epi32_mask(a, b, p) \ 4999 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5000 (__v16si)(__m512i)(b), (int)(p), \ 5003 #define _mm512_cmp_epu32_mask(a, b, p) \ 5004 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5005 (__v16si)(__m512i)(b), (int)(p), \ 5008 #define _mm512_cmp_epi64_mask(a, b, p) \ 5009 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5010 (__v8di)(__m512i)(b), (int)(p), \ 5013 #define _mm512_cmp_epu64_mask(a, b, p) \ 5014 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5015 (__v8di)(__m512i)(b), (int)(p), \ 5018 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ 5019 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 5020 (__v16si)(__m512i)(b), (int)(p), \ 5023 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ 5024 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 5025 (__v16si)(__m512i)(b), (int)(p), \ 5028 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ 5029 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 5030 (__v8di)(__m512i)(b), (int)(p), \ 5033 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ 5034 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 5035 (__v8di)(__m512i)(b), (int)(p), \ 5038 #define _mm512_rol_epi32(a, b) \ 5039 (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)) 5041 #define _mm512_mask_rol_epi32(W, U, a, b) \ 5042 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5043 (__v16si)_mm512_rol_epi32((a), (b)), \ 5044 (__v16si)(__m512i)(W)) 5046 #define _mm512_maskz_rol_epi32(U, a, b) \ 5047 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5048 (__v16si)_mm512_rol_epi32((a), (b)), \ 5049 (__v16si)_mm512_setzero_si512()) 5051 #define _mm512_rol_epi64(a, b) \ 5052 (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)) 5054 #define _mm512_mask_rol_epi64(W, U, a, b) \ 5055 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5056 (__v8di)_mm512_rol_epi64((a), (b)), \ 5057 (__v8di)(__m512i)(W)) 5059 #define _mm512_maskz_rol_epi64(U, a, b) \ 5060 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5061 (__v8di)_mm512_rol_epi64((a), (b)), \ 5062 (__v8di)_mm512_setzero_si512()) 5067 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5073 return (__m512i)__builtin_ia32_selectd_512(__U,
5081 return (__m512i)__builtin_ia32_selectd_512(__U,
5089 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5095 return (__m512i)__builtin_ia32_selectq_512(__U,
5103 return (__m512i)__builtin_ia32_selectq_512(__U,
5108 #define _mm512_ror_epi32(A, B) \ 5109 (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)) 5111 #define _mm512_mask_ror_epi32(W, U, A, B) \ 5112 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5113 (__v16si)_mm512_ror_epi32((A), (B)), \ 5114 (__v16si)(__m512i)(W)) 5116 #define _mm512_maskz_ror_epi32(U, A, B) \ 5117 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5118 (__v16si)_mm512_ror_epi32((A), (B)), \ 5119 (__v16si)_mm512_setzero_si512()) 5121 #define _mm512_ror_epi64(A, B) \ 5122 (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)) 5124 #define _mm512_mask_ror_epi64(W, U, A, B) \ 5125 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5126 (__v8di)_mm512_ror_epi64((A), (B)), \ 5127 (__v8di)(__m512i)(W)) 5129 #define _mm512_maskz_ror_epi64(U, A, B) \ 5130 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5131 (__v8di)_mm512_ror_epi64((A), (B)), \ 5132 (__v8di)_mm512_setzero_si512()) 5137 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5143 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5150 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5158 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5164 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5172 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5180 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5186 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5193 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5201 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5207 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5215 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5223 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5231 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5240 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5247 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5255 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5263 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5271 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5279 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5287 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5296 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5303 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5304 0, 0, 2, 2, 4, 4, 6, 6);
5310 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5318 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5323 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ 5324 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5325 (__v8df)(__m512d)(B), \ 5326 (__v8di)(__m512i)(C), (int)(imm), \ 5327 (__mmask8)-1, (int)(R)) 5329 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ 5330 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5331 (__v8df)(__m512d)(B), \ 5332 (__v8di)(__m512i)(C), (int)(imm), \ 5333 (__mmask8)(U), (int)(R)) 5335 #define _mm512_fixupimm_pd(A, B, C, imm) \ 5336 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5337 (__v8df)(__m512d)(B), \ 5338 (__v8di)(__m512i)(C), (int)(imm), \ 5340 _MM_FROUND_CUR_DIRECTION) 5342 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ 5343 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5344 (__v8df)(__m512d)(B), \ 5345 (__v8di)(__m512i)(C), (int)(imm), \ 5347 _MM_FROUND_CUR_DIRECTION) 5349 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ 5350 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5351 (__v8df)(__m512d)(B), \ 5352 (__v8di)(__m512i)(C), \ 5353 (int)(imm), (__mmask8)(U), \ 5356 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ 5357 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5358 (__v8df)(__m512d)(B), \ 5359 (__v8di)(__m512i)(C), \ 5360 (int)(imm), (__mmask8)(U), \ 5361 _MM_FROUND_CUR_DIRECTION) 5363 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ 5364 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5365 (__v16sf)(__m512)(B), \ 5366 (__v16si)(__m512i)(C), (int)(imm), \ 5367 (__mmask16)-1, (int)(R)) 5369 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ 5370 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5371 (__v16sf)(__m512)(B), \ 5372 (__v16si)(__m512i)(C), (int)(imm), \ 5373 (__mmask16)(U), (int)(R)) 5375 #define _mm512_fixupimm_ps(A, B, C, imm) \ 5376 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5377 (__v16sf)(__m512)(B), \ 5378 (__v16si)(__m512i)(C), (int)(imm), \ 5380 _MM_FROUND_CUR_DIRECTION) 5382 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ 5383 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5384 (__v16sf)(__m512)(B), \ 5385 (__v16si)(__m512i)(C), (int)(imm), \ 5387 _MM_FROUND_CUR_DIRECTION) 5389 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ 5390 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5391 (__v16sf)(__m512)(B), \ 5392 (__v16si)(__m512i)(C), \ 5393 (int)(imm), (__mmask16)(U), \ 5396 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ 5397 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5398 (__v16sf)(__m512)(B), \ 5399 (__v16si)(__m512i)(C), \ 5400 (int)(imm), (__mmask16)(U), \ 5401 _MM_FROUND_CUR_DIRECTION) 5403 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \ 5404 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5405 (__v2df)(__m128d)(B), \ 5406 (__v2di)(__m128i)(C), (int)(imm), \ 5407 (__mmask8)-1, (int)(R)) 5409 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ 5410 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5411 (__v2df)(__m128d)(B), \ 5412 (__v2di)(__m128i)(C), (int)(imm), \ 5413 (__mmask8)(U), (int)(R)) 5415 #define _mm_fixupimm_sd(A, B, C, imm) \ 5416 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5417 (__v2df)(__m128d)(B), \ 5418 (__v2di)(__m128i)(C), (int)(imm), \ 5420 _MM_FROUND_CUR_DIRECTION) 5422 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ 5423 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5424 (__v2df)(__m128d)(B), \ 5425 (__v2di)(__m128i)(C), (int)(imm), \ 5427 _MM_FROUND_CUR_DIRECTION) 5429 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ 5430 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5431 (__v2df)(__m128d)(B), \ 5432 (__v2di)(__m128i)(C), (int)(imm), \ 5433 (__mmask8)(U), (int)(R)) 5435 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ 5436 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5437 (__v2df)(__m128d)(B), \ 5438 (__v2di)(__m128i)(C), (int)(imm), \ 5440 _MM_FROUND_CUR_DIRECTION) 5442 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \ 5443 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5444 (__v4sf)(__m128)(B), \ 5445 (__v4si)(__m128i)(C), (int)(imm), \ 5446 (__mmask8)-1, (int)(R)) 5448 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ 5449 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5450 (__v4sf)(__m128)(B), \ 5451 (__v4si)(__m128i)(C), (int)(imm), \ 5452 (__mmask8)(U), (int)(R)) 5454 #define _mm_fixupimm_ss(A, B, C, imm) \ 5455 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5456 (__v4sf)(__m128)(B), \ 5457 (__v4si)(__m128i)(C), (int)(imm), \ 5459 _MM_FROUND_CUR_DIRECTION) 5461 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ 5462 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5463 (__v4sf)(__m128)(B), \ 5464 (__v4si)(__m128i)(C), (int)(imm), \ 5466 _MM_FROUND_CUR_DIRECTION) 5468 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ 5469 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5470 (__v4sf)(__m128)(B), \ 5471 (__v4si)(__m128i)(C), (int)(imm), \ 5472 (__mmask8)(U), (int)(R)) 5474 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ 5475 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5476 (__v4sf)(__m128)(B), \ 5477 (__v4si)(__m128i)(C), (int)(imm), \ 5479 _MM_FROUND_CUR_DIRECTION) 5481 #define _mm_getexp_round_sd(A, B, R) \ 5482 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5483 (__v2df)(__m128d)(B), \ 5484 (__v2df)_mm_setzero_pd(), \ 5485 (__mmask8)-1, (int)(R)) 5491 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5498 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5505 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \ 5506 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5507 (__v2df)(__m128d)(B), \ 5508 (__v2df)(__m128d)(W), \ 5509 (__mmask8)(U), (int)(R)) 5514 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5521 #define _mm_maskz_getexp_round_sd(U, A, B, R) \ 5522 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5523 (__v2df)(__m128d)(B), \ 5524 (__v2df)_mm_setzero_pd(), \ 5525 (__mmask8)(U), (int)(R)) 5527 #define _mm_getexp_round_ss(A, B, R) \ 5528 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5529 (__v4sf)(__m128)(B), \ 5530 (__v4sf)_mm_setzero_ps(), \ 5531 (__mmask8)-1, (int)(R)) 5536 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5543 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5550 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \ 5551 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5552 (__v4sf)(__m128)(B), \ 5553 (__v4sf)(__m128)(W), \ 5554 (__mmask8)(U), (int)(R)) 5559 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5566 #define _mm_maskz_getexp_round_ss(U, A, B, R) \ 5567 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5568 (__v4sf)(__m128)(B), \ 5569 (__v4sf)_mm_setzero_ps(), \ 5570 (__mmask8)(U), (int)(R)) 5572 #define _mm_getmant_round_sd(A, B, C, D, R) \ 5573 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5574 (__v2df)(__m128d)(B), \ 5575 (int)(((D)<<2) | (C)), \ 5576 (__v2df)_mm_setzero_pd(), \ 5577 (__mmask8)-1, (int)(R)) 5579 #define _mm_getmant_sd(A, B, C, D) \ 5580 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5581 (__v2df)(__m128d)(B), \ 5582 (int)(((D)<<2) | (C)), \ 5583 (__v2df)_mm_setzero_pd(), \ 5585 _MM_FROUND_CUR_DIRECTION) 5587 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \ 5588 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5589 (__v2df)(__m128d)(B), \ 5590 (int)(((D)<<2) | (C)), \ 5591 (__v2df)(__m128d)(W), \ 5593 _MM_FROUND_CUR_DIRECTION) 5595 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ 5596 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5597 (__v2df)(__m128d)(B), \ 5598 (int)(((D)<<2) | (C)), \ 5599 (__v2df)(__m128d)(W), \ 5600 (__mmask8)(U), (int)(R)) 5602 #define _mm_maskz_getmant_sd(U, A, B, C, D) \ 5603 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5604 (__v2df)(__m128d)(B), \ 5605 (int)(((D)<<2) | (C)), \ 5606 (__v2df)_mm_setzero_pd(), \ 5608 _MM_FROUND_CUR_DIRECTION) 5610 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ 5611 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5612 (__v2df)(__m128d)(B), \ 5613 (int)(((D)<<2) | (C)), \ 5614 (__v2df)_mm_setzero_pd(), \ 5615 (__mmask8)(U), (int)(R)) 5617 #define _mm_getmant_round_ss(A, B, C, D, R) \ 5618 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5619 (__v4sf)(__m128)(B), \ 5620 (int)(((D)<<2) | (C)), \ 5621 (__v4sf)_mm_setzero_ps(), \ 5622 (__mmask8)-1, (int)(R)) 5624 #define _mm_getmant_ss(A, B, C, D) \ 5625 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5626 (__v4sf)(__m128)(B), \ 5627 (int)(((D)<<2) | (C)), \ 5628 (__v4sf)_mm_setzero_ps(), \ 5630 _MM_FROUND_CUR_DIRECTION) 5632 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \ 5633 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5634 (__v4sf)(__m128)(B), \ 5635 (int)(((D)<<2) | (C)), \ 5636 (__v4sf)(__m128)(W), \ 5638 _MM_FROUND_CUR_DIRECTION) 5640 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ 5641 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5642 (__v4sf)(__m128)(B), \ 5643 (int)(((D)<<2) | (C)), \ 5644 (__v4sf)(__m128)(W), \ 5645 (__mmask8)(U), (int)(R)) 5647 #define _mm_maskz_getmant_ss(U, A, B, C, D) \ 5648 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5649 (__v4sf)(__m128)(B), \ 5650 (int)(((D)<<2) | (C)), \ 5651 (__v4sf)_mm_setzero_ps(), \ 5653 _MM_FROUND_CUR_DIRECTION) 5655 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ 5656 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5657 (__v4sf)(__m128)(B), \ 5658 (int)(((D)<<2) | (C)), \ 5659 (__v4sf)_mm_setzero_ps(), \ 5660 (__mmask8)(U), (int)(R)) 5668 #define _mm_comi_round_sd(A, B, P, R) \ 5669 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 5672 #define _mm_comi_round_ss(A, B, P, R) \ 5673 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 5677 #define _mm_cvt_roundsd_si64(A, R) \ 5678 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) 5684 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5690 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5698 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5706 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5712 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5720 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5728 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5734 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5742 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5750 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5756 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5764 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5772 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5778 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5786 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5794 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5800 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5808 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5816 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5822 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5830 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5838 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5844 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5860 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5866 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5874 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5882 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5888 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5896 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5904 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5910 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5918 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5926 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5932 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5940 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5945 #define _mm512_ternarylogic_epi32(A, B, C, imm) \ 5946 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 5947 (__v16si)(__m512i)(B), \ 5948 (__v16si)(__m512i)(C), (int)(imm), \ 5951 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ 5952 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 5953 (__v16si)(__m512i)(B), \ 5954 (__v16si)(__m512i)(C), (int)(imm), \ 5957 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 5958 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 5959 (__v16si)(__m512i)(B), \ 5960 (__v16si)(__m512i)(C), \ 5961 (int)(imm), (__mmask16)(U)) 5963 #define _mm512_ternarylogic_epi64(A, B, C, imm) \ 5964 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 5965 (__v8di)(__m512i)(B), \ 5966 (__v8di)(__m512i)(C), (int)(imm), \ 5969 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ 5970 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 5971 (__v8di)(__m512i)(B), \ 5972 (__v8di)(__m512i)(C), (int)(imm), \ 5975 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 5976 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 5977 (__v8di)(__m512i)(B), \ 5978 (__v8di)(__m512i)(C), (int)(imm), \ 5982 #define _mm_cvt_roundsd_i64(A, R) \ 5983 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) 5986 #define _mm_cvt_roundsd_si32(A, R) \ 5987 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) 5989 #define _mm_cvt_roundsd_i32(A, R) \ 5990 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) 5992 #define _mm_cvt_roundsd_u32(A, R) \ 5993 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) 5998 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
6003 #define _mm_cvt_roundsd_u64(A, R) \ 6004 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 6008 _mm_cvtsd_u64 (__m128d __A)
6010 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6016 #define _mm_cvt_roundss_si32(A, R) \ 6017 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) 6019 #define _mm_cvt_roundss_i32(A, R) \ 6020 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) 6023 #define _mm_cvt_roundss_si64(A, R) \ 6024 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) 6026 #define _mm_cvt_roundss_i64(A, R) \ 6027 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) 6030 #define _mm_cvt_roundss_u32(A, R) \ 6031 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) 6036 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6041 #define _mm_cvt_roundss_u64(A, R) \ 6042 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6046 _mm_cvtss_u64 (__m128 __A)
6048 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6054 #define _mm_cvtt_roundsd_i32(A, R) \ 6055 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) 6057 #define _mm_cvtt_roundsd_si32(A, R) \ 6058 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) 6063 return (
int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6068 #define _mm_cvtt_roundsd_si64(A, R) \ 6069 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) 6071 #define _mm_cvtt_roundsd_i64(A, R) \ 6072 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) 6075 _mm_cvttsd_i64 (__m128d __A)
6077 return (
long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6082 #define _mm_cvtt_roundsd_u32(A, R) \ 6083 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) 6088 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6093 #define _mm_cvtt_roundsd_u64(A, R) \ 6094 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6098 _mm_cvttsd_u64 (__m128d __A)
6100 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6106 #define _mm_cvtt_roundss_i32(A, R) \ 6107 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) 6109 #define _mm_cvtt_roundss_si32(A, R) \ 6110 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) 6115 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6120 #define _mm_cvtt_roundss_i64(A, R) \ 6121 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) 6123 #define _mm_cvtt_roundss_si64(A, R) \ 6124 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) 6127 _mm_cvttss_i64 (__m128 __A)
6129 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6134 #define _mm_cvtt_roundss_u32(A, R) \ 6135 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) 6140 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6145 #define _mm_cvtt_roundss_u64(A, R) \ 6146 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6150 _mm_cvttss_u64 (__m128 __A)
6152 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6158 #define _mm512_permute_pd(X, C) \ 6159 (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)) 6161 #define _mm512_mask_permute_pd(W, U, X, C) \ 6162 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6163 (__v8df)_mm512_permute_pd((X), (C)), \ 6164 (__v8df)(__m512d)(W)) 6166 #define _mm512_maskz_permute_pd(U, X, C) \ 6167 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6168 (__v8df)_mm512_permute_pd((X), (C)), \ 6169 (__v8df)_mm512_setzero_pd()) 6171 #define _mm512_permute_ps(X, C) \ 6172 (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)) 6174 #define _mm512_mask_permute_ps(W, U, X, C) \ 6175 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6176 (__v16sf)_mm512_permute_ps((X), (C)), \ 6177 (__v16sf)(__m512)(W)) 6179 #define _mm512_maskz_permute_ps(U, X, C) \ 6180 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6181 (__v16sf)_mm512_permute_ps((X), (C)), \ 6182 (__v16sf)_mm512_setzero_ps()) 6187 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6193 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6201 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6209 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6215 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6223 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6231 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6238 return (__m512d)__builtin_ia32_selectpd_512(__U,
6247 return (__m512d)__builtin_ia32_selectpd_512(__U,
6249 (__v8df)(__m512d)__I);
6256 return (__m512d)__builtin_ia32_selectpd_512(__U,
6264 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6271 return (__m512)__builtin_ia32_selectps_512(__U,
6279 return (__m512)__builtin_ia32_selectps_512(__U,
6281 (__v16sf)(__m512)__I);
6287 return (__m512)__builtin_ia32_selectps_512(__U,
6293 #define _mm512_cvtt_roundpd_epu32(A, R) \ 6294 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6295 (__v8si)_mm256_undefined_si256(), \ 6296 (__mmask8)-1, (int)(R)) 6298 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ 6299 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6300 (__v8si)(__m256i)(W), \ 6301 (__mmask8)(U), (int)(R)) 6303 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ 6304 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6305 (__v8si)_mm256_setzero_si256(), \ 6306 (__mmask8)(U), (int)(R)) 6311 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6321 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6330 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6337 #define _mm_roundscale_round_sd(A, B, imm, R) \ 6338 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6339 (__v2df)(__m128d)(B), \ 6340 (__v2df)_mm_setzero_pd(), \ 6341 (__mmask8)-1, (int)(imm), \ 6344 #define _mm_roundscale_sd(A, B, imm) \ 6345 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6346 (__v2df)(__m128d)(B), \ 6347 (__v2df)_mm_setzero_pd(), \ 6348 (__mmask8)-1, (int)(imm), \ 6349 _MM_FROUND_CUR_DIRECTION) 6351 #define _mm_mask_roundscale_sd(W, U, A, B, imm) \ 6352 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6353 (__v2df)(__m128d)(B), \ 6354 (__v2df)(__m128d)(W), \ 6355 (__mmask8)(U), (int)(imm), \ 6356 _MM_FROUND_CUR_DIRECTION) 6358 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ 6359 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6360 (__v2df)(__m128d)(B), \ 6361 (__v2df)(__m128d)(W), \ 6362 (__mmask8)(U), (int)(I), \ 6365 #define _mm_maskz_roundscale_sd(U, A, B, I) \ 6366 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6367 (__v2df)(__m128d)(B), \ 6368 (__v2df)_mm_setzero_pd(), \ 6369 (__mmask8)(U), (int)(I), \ 6370 _MM_FROUND_CUR_DIRECTION) 6372 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ 6373 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6374 (__v2df)(__m128d)(B), \ 6375 (__v2df)_mm_setzero_pd(), \ 6376 (__mmask8)(U), (int)(I), \ 6379 #define _mm_roundscale_round_ss(A, B, imm, R) \ 6380 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6381 (__v4sf)(__m128)(B), \ 6382 (__v4sf)_mm_setzero_ps(), \ 6383 (__mmask8)-1, (int)(imm), \ 6386 #define _mm_roundscale_ss(A, B, imm) \ 6387 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6388 (__v4sf)(__m128)(B), \ 6389 (__v4sf)_mm_setzero_ps(), \ 6390 (__mmask8)-1, (int)(imm), \ 6391 _MM_FROUND_CUR_DIRECTION) 6393 #define _mm_mask_roundscale_ss(W, U, A, B, I) \ 6394 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6395 (__v4sf)(__m128)(B), \ 6396 (__v4sf)(__m128)(W), \ 6397 (__mmask8)(U), (int)(I), \ 6398 _MM_FROUND_CUR_DIRECTION) 6400 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ 6401 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6402 (__v4sf)(__m128)(B), \ 6403 (__v4sf)(__m128)(W), \ 6404 (__mmask8)(U), (int)(I), \ 6407 #define _mm_maskz_roundscale_ss(U, A, B, I) \ 6408 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6409 (__v4sf)(__m128)(B), \ 6410 (__v4sf)_mm_setzero_ps(), \ 6411 (__mmask8)(U), (int)(I), \ 6412 _MM_FROUND_CUR_DIRECTION) 6414 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ 6415 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6416 (__v4sf)(__m128)(B), \ 6417 (__v4sf)_mm_setzero_ps(), \ 6418 (__mmask8)(U), (int)(I), \ 6421 #define _mm512_scalef_round_pd(A, B, R) \ 6422 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6423 (__v8df)(__m512d)(B), \ 6424 (__v8df)_mm512_undefined_pd(), \ 6425 (__mmask8)-1, (int)(R)) 6427 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ 6428 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6429 (__v8df)(__m512d)(B), \ 6430 (__v8df)(__m512d)(W), \ 6431 (__mmask8)(U), (int)(R)) 6433 #define _mm512_maskz_scalef_round_pd(U, A, B, R) \ 6434 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6435 (__v8df)(__m512d)(B), \ 6436 (__v8df)_mm512_setzero_pd(), \ 6437 (__mmask8)(U), (int)(R)) 6442 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6453 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6463 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6471 #define _mm512_scalef_round_ps(A, B, R) \ 6472 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6473 (__v16sf)(__m512)(B), \ 6474 (__v16sf)_mm512_undefined_ps(), \ 6475 (__mmask16)-1, (int)(R)) 6477 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ 6478 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6479 (__v16sf)(__m512)(B), \ 6480 (__v16sf)(__m512)(W), \ 6481 (__mmask16)(U), (int)(R)) 6483 #define _mm512_maskz_scalef_round_ps(U, A, B, R) \ 6484 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6485 (__v16sf)(__m512)(B), \ 6486 (__v16sf)_mm512_setzero_ps(), \ 6487 (__mmask16)(U), (int)(R)) 6492 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6503 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6513 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6521 #define _mm_scalef_round_sd(A, B, R) \ 6522 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6523 (__v2df)(__m128d)(B), \ 6524 (__v2df)_mm_setzero_pd(), \ 6525 (__mmask8)-1, (int)(R)) 6530 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6539 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6546 #define _mm_mask_scalef_round_sd(W, U, A, B, R) \ 6547 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6548 (__v2df)(__m128d)(B), \ 6549 (__v2df)(__m128d)(W), \ 6550 (__mmask8)(U), (int)(R)) 6555 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6562 #define _mm_maskz_scalef_round_sd(U, A, B, R) \ 6563 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6564 (__v2df)(__m128d)(B), \ 6565 (__v2df)_mm_setzero_pd(), \ 6566 (__mmask8)(U), (int)(R)) 6568 #define _mm_scalef_round_ss(A, B, R) \ 6569 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6570 (__v4sf)(__m128)(B), \ 6571 (__v4sf)_mm_setzero_ps(), \ 6572 (__mmask8)-1, (int)(R)) 6577 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6586 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6593 #define _mm_mask_scalef_round_ss(W, U, A, B, R) \ 6594 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6595 (__v4sf)(__m128)(B), \ 6596 (__v4sf)(__m128)(W), \ 6597 (__mmask8)(U), (int)(R)) 6602 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6609 #define _mm_maskz_scalef_round_ss(U, A, B, R) \ 6610 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6611 (__v4sf)(__m128)(B), \ 6612 (__v4sf)_mm_setzero_ps(), \ 6619 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6625 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6632 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6640 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6646 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6654 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6659 #define _mm512_shuffle_f32x4(A, B, imm) \ 6660 (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ 6661 (__v16sf)(__m512)(B), (int)(imm)) 6663 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ 6664 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6665 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6666 (__v16sf)(__m512)(W)) 6668 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ 6669 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6670 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6671 (__v16sf)_mm512_setzero_ps()) 6673 #define _mm512_shuffle_f64x2(A, B, imm) \ 6674 (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ 6675 (__v8df)(__m512d)(B), (int)(imm)) 6677 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ 6678 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6679 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6680 (__v8df)(__m512d)(W)) 6682 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ 6683 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6684 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6685 (__v8df)_mm512_setzero_pd()) 6687 #define _mm512_shuffle_i32x4(A, B, imm) \ 6688 (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ 6689 (__v16si)(__m512i)(B), (int)(imm)) 6691 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ 6692 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 6693 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6694 (__v16si)(__m512i)(W)) 6696 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ 6697 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 6698 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6699 (__v16si)_mm512_setzero_si512()) 6701 #define _mm512_shuffle_i64x2(A, B, imm) \ 6702 (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ 6703 (__v8di)(__m512i)(B), (int)(imm)) 6705 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ 6706 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 6707 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6708 (__v8di)(__m512i)(W)) 6710 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ 6711 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 6712 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6713 (__v8di)_mm512_setzero_si512()) 6715 #define _mm512_shuffle_pd(A, B, M) \ 6716 (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ 6717 (__v8df)(__m512d)(B), (int)(M)) 6719 #define _mm512_mask_shuffle_pd(W, U, A, B, M) \ 6720 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6721 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6722 (__v8df)(__m512d)(W)) 6724 #define _mm512_maskz_shuffle_pd(U, A, B, M) \ 6725 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6726 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6727 (__v8df)_mm512_setzero_pd()) 6729 #define _mm512_shuffle_ps(A, B, M) \ 6730 (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ 6731 (__v16sf)(__m512)(B), (int)(M)) 6733 #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ 6734 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6735 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6736 (__v16sf)(__m512)(W)) 6738 #define _mm512_maskz_shuffle_ps(U, A, B, M) \ 6739 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6740 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6741 (__v16sf)_mm512_setzero_ps()) 6743 #define _mm_sqrt_round_sd(A, B, R) \ 6744 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6745 (__v2df)(__m128d)(B), \ 6746 (__v2df)_mm_setzero_pd(), \ 6747 (__mmask8)-1, (int)(R)) 6752 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6759 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ 6760 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6761 (__v2df)(__m128d)(B), \ 6762 (__v2df)(__m128d)(W), \ 6763 (__mmask8)(U), (int)(R)) 6768 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6775 #define _mm_maskz_sqrt_round_sd(U, A, B, R) \ 6776 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6777 (__v2df)(__m128d)(B), \ 6778 (__v2df)_mm_setzero_pd(), \ 6779 (__mmask8)(U), (int)(R)) 6781 #define _mm_sqrt_round_ss(A, B, R) \ 6782 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6783 (__v4sf)(__m128)(B), \ 6784 (__v4sf)_mm_setzero_ps(), \ 6785 (__mmask8)-1, (int)(R)) 6790 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6797 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ 6798 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6799 (__v4sf)(__m128)(B), \ 6800 (__v4sf)(__m128)(W), (__mmask8)(U), \ 6806 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6813 #define _mm_maskz_sqrt_round_ss(U, A, B, R) \ 6814 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6815 (__v4sf)(__m128)(B), \ 6816 (__v4sf)_mm_setzero_ps(), \ 6817 (__mmask8)(U), (int)(R)) 6822 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6823 0, 1, 2, 3, 0, 1, 2, 3,
6824 0, 1, 2, 3, 0, 1, 2, 3);
6830 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6838 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6846 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6847 0, 1, 2, 3, 0, 1, 2, 3);
6853 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6861 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6869 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6870 0, 1, 2, 3, 0, 1, 2, 3,
6871 0, 1, 2, 3, 0, 1, 2, 3);
6877 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6885 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6893 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6894 0, 1, 2, 3, 0, 1, 2, 3);
6900 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6908 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6916 return (__m512d)__builtin_ia32_selectpd_512(__M,
6924 return (__m512d)__builtin_ia32_selectpd_512(__M,
6932 return (__m512)__builtin_ia32_selectps_512(__M,
6940 return (__m512)__builtin_ia32_selectps_512(__M,
6948 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6956 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6957 (__v16qi) __O, __M);
6963 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6971 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6977 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6985 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6986 (__v16hi) __O, __M);
6992 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
7000 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7006 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7014 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7015 (__v16qi) __O, __M);
7021 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7029 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7035 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7043 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7050 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7058 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7064 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7072 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7079 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7087 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7093 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7101 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7109 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7117 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7123 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7131 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7139 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7147 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7153 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7161 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7169 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7177 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7183 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7191 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7198 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7206 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7212 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7220 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7227 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7235 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7241 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7249 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7250 (__v16qi) __O, __M);
7256 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7264 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7270 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7278 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7279 (__v16hi) __O, __M);
7285 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7293 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7299 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7307 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7308 (__v16qi) __O, __M);
7314 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7322 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7328 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7336 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7343 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7351 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7357 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7365 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7372 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7380 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7383 #define _mm512_extracti32x4_epi32(A, imm) \ 7384 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7385 (__v4si)_mm_undefined_si128(), \ 7388 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ 7389 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7390 (__v4si)(__m128i)(W), \ 7393 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ 7394 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7395 (__v4si)_mm_setzero_si128(), \ 7398 #define _mm512_extracti64x4_epi64(A, imm) \ 7399 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7400 (__v4di)_mm256_undefined_si256(), \ 7403 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ 7404 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7405 (__v4di)(__m256i)(W), \ 7408 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ 7409 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7410 (__v4di)_mm256_setzero_si256(), \ 7413 #define _mm512_insertf64x4(A, B, imm) \ 7414 (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ 7415 (__v4df)(__m256d)(B), (int)(imm)) 7417 #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ 7418 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7419 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7420 (__v8df)(__m512d)(W)) 7422 #define _mm512_maskz_insertf64x4(U, A, B, imm) \ 7423 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7424 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7425 (__v8df)_mm512_setzero_pd()) 7427 #define _mm512_inserti64x4(A, B, imm) \ 7428 (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ 7429 (__v4di)(__m256i)(B), (int)(imm)) 7431 #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ 7432 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7433 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7434 (__v8di)(__m512i)(W)) 7436 #define _mm512_maskz_inserti64x4(U, A, B, imm) \ 7437 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7438 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7439 (__v8di)_mm512_setzero_si512()) 7441 #define _mm512_insertf32x4(A, B, imm) \ 7442 (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ 7443 (__v4sf)(__m128)(B), (int)(imm)) 7445 #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ 7446 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7447 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7448 (__v16sf)(__m512)(W)) 7450 #define _mm512_maskz_insertf32x4(U, A, B, imm) \ 7451 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7452 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7453 (__v16sf)_mm512_setzero_ps()) 7455 #define _mm512_inserti32x4(A, B, imm) \ 7456 (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ 7457 (__v4si)(__m128i)(B), (int)(imm)) 7459 #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ 7460 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7461 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7462 (__v16si)(__m512i)(W)) 7464 #define _mm512_maskz_inserti32x4(U, A, B, imm) \ 7465 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7466 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7467 (__v16si)_mm512_setzero_si512()) 7469 #define _mm512_getmant_round_pd(A, B, C, R) \ 7470 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7471 (int)(((C)<<2) | (B)), \ 7472 (__v8df)_mm512_undefined_pd(), \ 7473 (__mmask8)-1, (int)(R)) 7475 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ 7476 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7477 (int)(((C)<<2) | (B)), \ 7478 (__v8df)(__m512d)(W), \ 7479 (__mmask8)(U), (int)(R)) 7481 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ 7482 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7483 (int)(((C)<<2) | (B)), \ 7484 (__v8df)_mm512_setzero_pd(), \ 7485 (__mmask8)(U), (int)(R)) 7487 #define _mm512_getmant_pd(A, B, C) \ 7488 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7489 (int)(((C)<<2) | (B)), \ 7490 (__v8df)_mm512_setzero_pd(), \ 7492 _MM_FROUND_CUR_DIRECTION) 7494 #define _mm512_mask_getmant_pd(W, U, A, B, C) \ 7495 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7496 (int)(((C)<<2) | (B)), \ 7497 (__v8df)(__m512d)(W), \ 7499 _MM_FROUND_CUR_DIRECTION) 7501 #define _mm512_maskz_getmant_pd(U, A, B, C) \ 7502 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7503 (int)(((C)<<2) | (B)), \ 7504 (__v8df)_mm512_setzero_pd(), \ 7506 _MM_FROUND_CUR_DIRECTION) 7508 #define _mm512_getmant_round_ps(A, B, C, R) \ 7509 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7510 (int)(((C)<<2) | (B)), \ 7511 (__v16sf)_mm512_undefined_ps(), \ 7512 (__mmask16)-1, (int)(R)) 7514 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ 7515 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7516 (int)(((C)<<2) | (B)), \ 7517 (__v16sf)(__m512)(W), \ 7518 (__mmask16)(U), (int)(R)) 7520 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ 7521 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7522 (int)(((C)<<2) | (B)), \ 7523 (__v16sf)_mm512_setzero_ps(), \ 7524 (__mmask16)(U), (int)(R)) 7526 #define _mm512_getmant_ps(A, B, C) \ 7527 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7528 (int)(((C)<<2)|(B)), \ 7529 (__v16sf)_mm512_undefined_ps(), \ 7531 _MM_FROUND_CUR_DIRECTION) 7533 #define _mm512_mask_getmant_ps(W, U, A, B, C) \ 7534 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7535 (int)(((C)<<2)|(B)), \ 7536 (__v16sf)(__m512)(W), \ 7538 _MM_FROUND_CUR_DIRECTION) 7540 #define _mm512_maskz_getmant_ps(U, A, B, C) \ 7541 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7542 (int)(((C)<<2)|(B)), \ 7543 (__v16sf)_mm512_setzero_ps(), \ 7545 _MM_FROUND_CUR_DIRECTION) 7547 #define _mm512_getexp_round_pd(A, R) \ 7548 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7549 (__v8df)_mm512_undefined_pd(), \ 7550 (__mmask8)-1, (int)(R)) 7552 #define _mm512_mask_getexp_round_pd(W, U, A, R) \ 7553 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7554 (__v8df)(__m512d)(W), \ 7555 (__mmask8)(U), (int)(R)) 7557 #define _mm512_maskz_getexp_round_pd(U, A, R) \ 7558 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7559 (__v8df)_mm512_setzero_pd(), \ 7560 (__mmask8)(U), (int)(R)) 7565 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7574 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7583 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7589 #define _mm512_getexp_round_ps(A, R) \ 7590 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7591 (__v16sf)_mm512_undefined_ps(), \ 7592 (__mmask16)-1, (int)(R)) 7594 #define _mm512_mask_getexp_round_ps(W, U, A, R) \ 7595 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7596 (__v16sf)(__m512)(W), \ 7597 (__mmask16)(U), (int)(R)) 7599 #define _mm512_maskz_getexp_round_ps(U, A, R) \ 7600 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7601 (__v16sf)_mm512_setzero_ps(), \ 7602 (__mmask16)(U), (int)(R)) 7607 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7616 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7625 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7631 #define _mm512_i64gather_ps(index, addr, scale) \ 7632 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 7633 (void const *)(addr), \ 7634 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7637 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7638 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ 7639 (void const *)(addr), \ 7640 (__v8di)(__m512i)(index), \ 7641 (__mmask8)(mask), (int)(scale)) 7643 #define _mm512_i64gather_epi32(index, addr, scale) \ 7644 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ 7645 (void const *)(addr), \ 7646 (__v8di)(__m512i)(index), \ 7647 (__mmask8)-1, (int)(scale)) 7649 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7650 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 7651 (void const *)(addr), \ 7652 (__v8di)(__m512i)(index), \ 7653 (__mmask8)(mask), (int)(scale)) 7655 #define _mm512_i64gather_pd(index, addr, scale) \ 7656 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 7657 (void const *)(addr), \ 7658 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7661 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7662 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 7663 (void const *)(addr), \ 7664 (__v8di)(__m512i)(index), \ 7665 (__mmask8)(mask), (int)(scale)) 7667 #define _mm512_i64gather_epi64(index, addr, scale) \ 7668 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ 7669 (void const *)(addr), \ 7670 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7673 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7674 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 7675 (void const *)(addr), \ 7676 (__v8di)(__m512i)(index), \ 7677 (__mmask8)(mask), (int)(scale)) 7679 #define _mm512_i32gather_ps(index, addr, scale) \ 7680 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 7681 (void const *)(addr), \ 7682 (__v16sf)(__m512)(index), \ 7683 (__mmask16)-1, (int)(scale)) 7685 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ 7686 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 7687 (void const *)(addr), \ 7688 (__v16sf)(__m512)(index), \ 7689 (__mmask16)(mask), (int)(scale)) 7691 #define _mm512_i32gather_epi32(index, addr, scale) \ 7692 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 7693 (void const *)(addr), \ 7694 (__v16si)(__m512i)(index), \ 7695 (__mmask16)-1, (int)(scale)) 7697 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 7698 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 7699 (void const *)(addr), \ 7700 (__v16si)(__m512i)(index), \ 7701 (__mmask16)(mask), (int)(scale)) 7703 #define _mm512_i32gather_pd(index, addr, scale) \ 7704 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 7705 (void const *)(addr), \ 7706 (__v8si)(__m256i)(index), (__mmask8)-1, \ 7709 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ 7710 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 7711 (void const *)(addr), \ 7712 (__v8si)(__m256i)(index), \ 7713 (__mmask8)(mask), (int)(scale)) 7715 #define _mm512_i32gather_epi64(index, addr, scale) \ 7716 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 7717 (void const *)(addr), \ 7718 (__v8si)(__m256i)(index), (__mmask8)-1, \ 7721 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 7722 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 7723 (void const *)(addr), \ 7724 (__v8si)(__m256i)(index), \ 7725 (__mmask8)(mask), (int)(scale)) 7727 #define _mm512_i64scatter_ps(addr, index, v1, scale) \ 7728 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \ 7729 (__v8di)(__m512i)(index), \ 7730 (__v8sf)(__m256)(v1), (int)(scale)) 7732 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 7733 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \ 7734 (__v8di)(__m512i)(index), \ 7735 (__v8sf)(__m256)(v1), (int)(scale)) 7737 #define _mm512_i64scatter_epi32(addr, index, v1, scale) \ 7738 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \ 7739 (__v8di)(__m512i)(index), \ 7740 (__v8si)(__m256i)(v1), (int)(scale)) 7742 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 7743 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \ 7744 (__v8di)(__m512i)(index), \ 7745 (__v8si)(__m256i)(v1), (int)(scale)) 7747 #define _mm512_i64scatter_pd(addr, index, v1, scale) \ 7748 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \ 7749 (__v8di)(__m512i)(index), \ 7750 (__v8df)(__m512d)(v1), (int)(scale)) 7752 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 7753 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \ 7754 (__v8di)(__m512i)(index), \ 7755 (__v8df)(__m512d)(v1), (int)(scale)) 7757 #define _mm512_i64scatter_epi64(addr, index, v1, scale) \ 7758 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \ 7759 (__v8di)(__m512i)(index), \ 7760 (__v8di)(__m512i)(v1), (int)(scale)) 7762 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 7763 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \ 7764 (__v8di)(__m512i)(index), \ 7765 (__v8di)(__m512i)(v1), (int)(scale)) 7767 #define _mm512_i32scatter_ps(addr, index, v1, scale) \ 7768 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \ 7769 (__v16si)(__m512i)(index), \ 7770 (__v16sf)(__m512)(v1), (int)(scale)) 7772 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 7773 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \ 7774 (__v16si)(__m512i)(index), \ 7775 (__v16sf)(__m512)(v1), (int)(scale)) 7777 #define _mm512_i32scatter_epi32(addr, index, v1, scale) \ 7778 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \ 7779 (__v16si)(__m512i)(index), \ 7780 (__v16si)(__m512i)(v1), (int)(scale)) 7782 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 7783 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \ 7784 (__v16si)(__m512i)(index), \ 7785 (__v16si)(__m512i)(v1), (int)(scale)) 7787 #define _mm512_i32scatter_pd(addr, index, v1, scale) \ 7788 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \ 7789 (__v8si)(__m256i)(index), \ 7790 (__v8df)(__m512d)(v1), (int)(scale)) 7792 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 7793 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \ 7794 (__v8si)(__m256i)(index), \ 7795 (__v8df)(__m512d)(v1), (int)(scale)) 7797 #define _mm512_i32scatter_epi64(addr, index, v1, scale) \ 7798 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \ 7799 (__v8si)(__m256i)(index), \ 7800 (__v8di)(__m512i)(v1), (int)(scale)) 7802 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 7803 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \ 7804 (__v8si)(__m256i)(index), \ 7805 (__v8di)(__m512i)(v1), (int)(scale)) 7810 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7817 #define _mm_fmadd_round_ss(A, B, C, R) \ 7818 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7819 (__v4sf)(__m128)(B), \ 7820 (__v4sf)(__m128)(C), (__mmask8)-1, \ 7823 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ 7824 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7825 (__v4sf)(__m128)(A), \ 7826 (__v4sf)(__m128)(B), (__mmask8)(U), \ 7832 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7839 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ 7840 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7841 (__v4sf)(__m128)(B), \ 7842 (__v4sf)(__m128)(C), (__mmask8)(U), \ 7848 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7855 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ 7856 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 7857 (__v4sf)(__m128)(X), \ 7858 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7864 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7871 #define _mm_fmsub_round_ss(A, B, C, R) \ 7872 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7873 (__v4sf)(__m128)(B), \ 7874 -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7877 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ 7878 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7879 (__v4sf)(__m128)(A), \ 7880 -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7886 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7893 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ 7894 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7895 (__v4sf)(__m128)(B), \ 7896 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 7902 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7909 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ 7910 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 7911 (__v4sf)(__m128)(X), \ 7912 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7918 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7925 #define _mm_fnmadd_round_ss(A, B, C, R) \ 7926 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7927 -(__v4sf)(__m128)(B), \ 7928 (__v4sf)(__m128)(C), (__mmask8)-1, \ 7931 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ 7932 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7933 -(__v4sf)(__m128)(A), \ 7934 (__v4sf)(__m128)(B), (__mmask8)(U), \ 7940 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7947 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ 7948 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7949 -(__v4sf)(__m128)(B), \ 7950 (__v4sf)(__m128)(C), (__mmask8)(U), \ 7956 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7963 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ 7964 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 7965 -(__v4sf)(__m128)(X), \ 7966 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7972 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7979 #define _mm_fnmsub_round_ss(A, B, C, R) \ 7980 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7981 -(__v4sf)(__m128)(B), \ 7982 -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7985 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ 7986 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7987 -(__v4sf)(__m128)(A), \ 7988 -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7994 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
8001 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ 8002 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 8003 -(__v4sf)(__m128)(B), \ 8004 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 8010 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
8017 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ 8018 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 8019 -(__v4sf)(__m128)(X), \ 8020 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 8026 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8033 #define _mm_fmadd_round_sd(A, B, C, R) \ 8034 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8035 (__v2df)(__m128d)(B), \ 8036 (__v2df)(__m128d)(C), (__mmask8)-1, \ 8039 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ 8040 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8041 (__v2df)(__m128d)(A), \ 8042 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8048 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8055 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ 8056 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8057 (__v2df)(__m128d)(B), \ 8058 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8064 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8071 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ 8072 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8073 (__v2df)(__m128d)(X), \ 8074 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8080 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8087 #define _mm_fmsub_round_sd(A, B, C, R) \ 8088 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8089 (__v2df)(__m128d)(B), \ 8090 -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8093 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ 8094 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8095 (__v2df)(__m128d)(A), \ 8096 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8102 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8109 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ 8110 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8111 (__v2df)(__m128d)(B), \ 8112 -(__v2df)(__m128d)(C), \ 8113 (__mmask8)(U), (int)(R)) 8118 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8125 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ 8126 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8127 (__v2df)(__m128d)(X), \ 8128 (__v2df)(__m128d)(Y), \ 8129 (__mmask8)(U), (int)(R)) 8134 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8141 #define _mm_fnmadd_round_sd(A, B, C, R) \ 8142 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8143 -(__v2df)(__m128d)(B), \ 8144 (__v2df)(__m128d)(C), (__mmask8)-1, \ 8147 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ 8148 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8149 -(__v2df)(__m128d)(A), \ 8150 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8156 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8163 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ 8164 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8165 -(__v2df)(__m128d)(B), \ 8166 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8172 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8179 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ 8180 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8181 -(__v2df)(__m128d)(X), \ 8182 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8188 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8195 #define _mm_fnmsub_round_sd(A, B, C, R) \ 8196 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8197 -(__v2df)(__m128d)(B), \ 8198 -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8201 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ 8202 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8203 -(__v2df)(__m128d)(A), \ 8204 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8210 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8217 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ 8218 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8219 -(__v2df)(__m128d)(B), \ 8220 -(__v2df)(__m128d)(C), \ 8227 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8234 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ 8235 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8236 -(__v2df)(__m128d)(X), \ 8237 (__v2df)(__m128d)(Y), \ 8238 (__mmask8)(U), (int)(R)) 8240 #define _mm512_permutex_pd(X, C) \ 8241 (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)) 8243 #define _mm512_mask_permutex_pd(W, U, X, C) \ 8244 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8245 (__v8df)_mm512_permutex_pd((X), (C)), \ 8246 (__v8df)(__m512d)(W)) 8248 #define _mm512_maskz_permutex_pd(U, X, C) \ 8249 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8250 (__v8df)_mm512_permutex_pd((X), (C)), \ 8251 (__v8df)_mm512_setzero_pd()) 8253 #define _mm512_permutex_epi64(X, C) \ 8254 (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)) 8256 #define _mm512_mask_permutex_epi64(W, U, X, C) \ 8257 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8258 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8259 (__v8di)(__m512i)(W)) 8261 #define _mm512_maskz_permutex_epi64(U, X, C) \ 8262 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8263 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8264 (__v8di)_mm512_setzero_si512()) 8269 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8275 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8283 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8291 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8297 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8306 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8314 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8320 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8328 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8336 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8339 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 8344 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8353 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8358 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 8363 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8369 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8375 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8381 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8387 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8393 return (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8399 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8404 *__C = (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8405 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8411 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8417 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8423 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8426 #define _kand_mask16 _mm512_kand 8427 #define _kandn_mask16 _mm512_kandn 8428 #define _knot_mask16 _mm512_knot 8429 #define _kor_mask16 _mm512_kor 8430 #define _kxnor_mask16 _mm512_kxnor 8431 #define _kxor_mask16 _mm512_kxor 8433 #define _kshiftli_mask16(A, I) \ 8434 (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)) 8436 #define _kshiftri_mask16(A, I) \ 8437 (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)) 8441 return (
unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8446 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8451 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8456 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8463 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8470 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)__P);
8477 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8483 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8484 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8490 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8498 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8507 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8515 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8524 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8532 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8541 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8549 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8555 #define _mm_cmp_round_ss_mask(X, Y, P, R) \ 8556 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8557 (__v4sf)(__m128)(Y), (int)(P), \ 8558 (__mmask8)-1, (int)(R)) 8560 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 8561 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8562 (__v4sf)(__m128)(Y), (int)(P), \ 8563 (__mmask8)(M), (int)(R)) 8565 #define _mm_cmp_ss_mask(X, Y, P) \ 8566 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8567 (__v4sf)(__m128)(Y), (int)(P), \ 8569 _MM_FROUND_CUR_DIRECTION) 8571 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 8572 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8573 (__v4sf)(__m128)(Y), (int)(P), \ 8575 _MM_FROUND_CUR_DIRECTION) 8577 #define _mm_cmp_round_sd_mask(X, Y, P, R) \ 8578 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8579 (__v2df)(__m128d)(Y), (int)(P), \ 8580 (__mmask8)-1, (int)(R)) 8582 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 8583 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8584 (__v2df)(__m128d)(Y), (int)(P), \ 8585 (__mmask8)(M), (int)(R)) 8587 #define _mm_cmp_sd_mask(X, Y, P) \ 8588 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8589 (__v2df)(__m128d)(Y), (int)(P), \ 8591 _MM_FROUND_CUR_DIRECTION) 8593 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 8594 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8595 (__v2df)(__m128d)(Y), (int)(P), \ 8597 _MM_FROUND_CUR_DIRECTION) 8660 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8661 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8667 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8675 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8683 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8684 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8690 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8698 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8706 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B), __W);
8712 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B),
8719 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B), __W);
8725 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B),
8732 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8738 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8744 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8748 return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
8754 return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
8762 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8766 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
8772 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
8777 #define _mm512_shuffle_epi32(A, I) \ 8778 (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)) 8780 #define _mm512_mask_shuffle_epi32(W, U, A, I) \ 8781 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8782 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8783 (__v16si)(__m512i)(W)) 8785 #define _mm512_maskz_shuffle_epi32(U, A, I) \ 8786 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8787 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8788 (__v16si)_mm512_setzero_si512()) 8793 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8801 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8809 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8817 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8825 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
8833 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
8841 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
8849 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
8857 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
8865 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
8873 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
8881 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
8889 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8897 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8905 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8913 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8918 #define _mm512_cvt_roundps_pd(A, R) \ 8919 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8920 (__v8df)_mm512_undefined_pd(), \ 8921 (__mmask8)-1, (int)(R)) 8923 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ 8924 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8925 (__v8df)(__m512d)(W), \ 8926 (__mmask8)(U), (int)(R)) 8928 #define _mm512_maskz_cvt_roundps_pd(U, A, R) \ 8929 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8930 (__v8df)_mm512_setzero_pd(), \ 8931 (__mmask8)(U), (int)(R)) 8936 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8942 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8950 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8970 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8978 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8986 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8994 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
9002 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9009 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9016 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9023 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9027 #define _mm_cvt_roundsd_ss(A, B, R) \ 9028 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9029 (__v2df)(__m128d)(B), \ 9030 (__v4sf)_mm_undefined_ps(), \ 9031 (__mmask8)-1, (int)(R)) 9033 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ 9034 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9035 (__v2df)(__m128d)(B), \ 9036 (__v4sf)(__m128)(W), \ 9037 (__mmask8)(U), (int)(R)) 9039 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ 9040 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 9041 (__v2df)(__m128d)(B), \ 9042 (__v4sf)_mm_setzero_ps(), \ 9043 (__mmask8)(U), (int)(R)) 9048 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9057 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9063 #define _mm_cvtss_i32 _mm_cvtss_si32 9064 #define _mm_cvtsd_i32 _mm_cvtsd_si32 9065 #define _mm_cvti32_sd _mm_cvtsi32_sd 9066 #define _mm_cvti32_ss _mm_cvtsi32_ss 9068 #define _mm_cvtss_i64 _mm_cvtss_si64 9069 #define _mm_cvtsd_i64 _mm_cvtsd_si64 9070 #define _mm_cvti64_sd _mm_cvtsi64_sd 9071 #define _mm_cvti64_ss _mm_cvtsi64_ss 9075 #define _mm_cvt_roundi64_sd(A, B, R) \ 9076 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9079 #define _mm_cvt_roundsi64_sd(A, B, R) \ 9080 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 9084 #define _mm_cvt_roundsi32_ss(A, B, R) \ 9085 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) 9087 #define _mm_cvt_roundi32_ss(A, B, R) \ 9088 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) 9091 #define _mm_cvt_roundsi64_ss(A, B, R) \ 9092 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9095 #define _mm_cvt_roundi64_ss(A, B, R) \ 9096 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9100 #define _mm_cvt_roundss_sd(A, B, R) \ 9101 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9102 (__v4sf)(__m128)(B), \ 9103 (__v2df)_mm_undefined_pd(), \ 9104 (__mmask8)-1, (int)(R)) 9106 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ 9107 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9108 (__v4sf)(__m128)(B), \ 9109 (__v2df)(__m128d)(W), \ 9110 (__mmask8)(U), (int)(R)) 9112 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ 9113 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9114 (__v4sf)(__m128)(B), \ 9115 (__v2df)_mm_setzero_pd(), \ 9116 (__mmask8)(U), (int)(R)) 9121 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9130 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9144 #define _mm_cvt_roundu64_sd(A, B, R) \ 9145 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9146 (unsigned long long)(B), (int)(R)) 9149 _mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
9156 #define _mm_cvt_roundu32_ss(A, B, R) \ 9157 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9168 #define _mm_cvt_roundu64_ss(A, B, R) \ 9169 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9170 (unsigned long long)(B), (int)(R)) 9173 _mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
9183 return (__m512i) __builtin_ia32_selectd_512(__M,
9191 return (__m512i) __builtin_ia32_selectq_512(__M,
9198 char __e58,
char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
9199 char __e52,
char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
9200 char __e46,
char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
9201 char __e40,
char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
9202 char __e34,
char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
9203 char __e28,
char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
9204 char __e22,
char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
9205 char __e16,
char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
9206 char __e10,
char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
9207 char __e4,
char __e3,
char __e2,
char __e1,
char __e0) {
9209 return __extension__ (__m512i)(__v64qi)
9210 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9211 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9212 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9213 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9214 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9215 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9216 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9217 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9222 short __e27,
short __e26,
short __e25,
short __e24,
short __e23,
9223 short __e22,
short __e21,
short __e20,
short __e19,
short __e18,
9224 short __e17,
short __e16,
short __e15,
short __e14,
short __e13,
9225 short __e12,
short __e11,
short __e10,
short __e9,
short __e8,
9226 short __e7,
short __e6,
short __e5,
short __e4,
short __e3,
9227 short __e2,
short __e1,
short __e0) {
9228 return __extension__ (__m512i)(__v32hi)
9229 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9230 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9231 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9232 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9237 int __E,
int __F,
int __G,
int __H,
9238 int __I,
int __J,
int __K,
int __L,
9239 int __M,
int __N,
int __O,
int __P)
9241 return __extension__ (__m512i)(__v16si)
9242 { __P, __O, __N, __M, __L, __K, __J, __I,
9243 __H, __G, __F, __E,
__D, __C, __B, __A };
9246 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9247 e8,e9,e10,e11,e12,e13,e14,e15) \ 9248 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 9249 (e5),(e4),(e3),(e2),(e1),(e0)) 9253 long long __D,
long long __E,
long long __F,
9254 long long __G,
long long __H)
9256 return __extension__ (__m512i) (__v8di)
9257 { __H, __G, __F, __E,
__D, __C, __B, __A };
9260 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9261 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9265 double __E,
double __F,
double __G,
double __H)
9267 return __extension__ (__m512d)
9268 { __H, __G, __F, __E,
__D, __C, __B, __A };
9271 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9272 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9276 float __E,
float __F,
float __G,
float __H,
9277 float __I,
float __J,
float __K,
float __L,
9278 float __M,
float __N,
float __O,
float __P)
9280 return __extension__ (__m512)
9281 { __P, __O, __N, __M, __L, __K, __J, __I,
9282 __H, __G, __F, __E,
__D, __C, __B, __A };
9285 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9286 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 9287 (e4),(e3),(e2),(e1),(e0)) 9323 #define _mm512_mask_reduce_operator(op) \ 9324 __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \ 9325 __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \ 9326 __m256i __t3 = (__m256i)(__t1 op __t2); \ 9327 __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \ 9328 __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \ 9329 __v2du __t6 = __t4 op __t5; \ 9330 __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9331 __v2du __t8 = __t6 op __t7; \ 9373 #undef _mm512_mask_reduce_operator 9375 #define _mm512_mask_reduce_operator(op) \ 9376 __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \ 9377 __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \ 9378 __m256d __t3 = __t1 op __t2; \ 9379 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ 9380 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ 9381 __m128d __t6 = __t4 op __t5; \ 9382 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9383 __m128d __t8 = __t6 op __t7; \ 9405 #undef _mm512_mask_reduce_operator 9407 #define _mm512_mask_reduce_operator(op) \ 9408 __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \ 9409 __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \ 9410 __m256i __t3 = (__m256i)(__t1 op __t2); \ 9411 __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \ 9412 __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \ 9413 __v4su __t6 = __t4 op __t5; \ 9414 __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9415 __v4su __t8 = __t6 op __t7; \ 9416 __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9417 __v4su __t10 = __t8 op __t9; \ 9463 #undef _mm512_mask_reduce_operator 9465 #define _mm512_mask_reduce_operator(op) \ 9466 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \ 9467 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \ 9468 __m256 __t3 = __t1 op __t2; \ 9469 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ 9470 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ 9471 __m128 __t6 = __t4 op __t5; \ 9472 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9473 __m128 __t8 = __t6 op __t7; \ 9474 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9475 __m128 __t10 = __t8 op __t9; \ 9499 #undef _mm512_mask_reduce_operator 9501 #define _mm512_mask_reduce_operator(op) \ 9502 __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \ 9503 __m512i __t2 = _mm512_##op(__V, __t1); \ 9504 __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \ 9505 __m512i __t4 = _mm512_##op(__t2, __t3); \ 9506 __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \ 9507 __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ 9553 #undef _mm512_mask_reduce_operator 9555 #define _mm512_mask_reduce_operator(op) \ 9556 __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \ 9557 __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \ 9558 __m256i __t3 = _mm256_##op(__t1, __t2); \ 9559 __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \ 9560 __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \ 9561 __m128i __t6 = _mm_##op(__t4, __t5); \ 9562 __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \ 9563 __m128i __t8 = _mm_##op(__t6, __t7); \ 9564 __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \ 9565 __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ 9611 #undef _mm512_mask_reduce_operator 9613 #define _mm512_mask_reduce_operator(op) \ 9614 __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \ 9615 __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \ 9616 __m256d __t3 = _mm256_##op(__t1, __t2); \ 9617 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ 9618 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ 9619 __m128d __t6 = _mm_##op(__t4, __t5); \ 9620 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9621 __m128d __t8 = _mm_##op(__t6, __t7); \ 9645 #undef _mm512_mask_reduce_operator 9647 #define _mm512_mask_reduce_operator(op) \ 9648 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \ 9649 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \ 9650 __m256 __t3 = _mm256_##op(__t1, __t2); \ 9651 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ 9652 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ 9653 __m128 __t6 = _mm_##op(__t4, __t5); \ 9654 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9655 __m128 __t8 = _mm_##op(__t6, __t7); \ 9656 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9657 __m128 __t10 = _mm_##op(__t8, __t9); \ 9681 #undef _mm512_mask_reduce_operator 9683 #undef __DEFAULT_FN_ATTRS512 9684 #undef __DEFAULT_FN_ATTRS128 9685 #undef __DEFAULT_FN_ATTRS static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
struct __storeu_i16 *__P __v
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
#define __DEFAULT_FN_ATTRS512
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ void short __D
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ void const void * __src
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ void int __a
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
#define _mm512_mask_reduce_operator(op)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(__m512i *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ vector float vector float __b
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(double *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS128
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(float *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b)