24 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 27 #ifndef __AVX512FINTRIN_H 28 #define __AVX512FINTRIN_H 34 typedef long long __v8di
__attribute__((__vector_size__(64)));
38 typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
39 typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
40 typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
41 typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
45 typedef long long __m512i
__attribute__((__vector_size__(64)));
51 #define _MM_FROUND_TO_NEAREST_INT 0x00 52 #define _MM_FROUND_TO_NEG_INF 0x01 53 #define _MM_FROUND_TO_POS_INF 0x02 54 #define _MM_FROUND_TO_ZERO 0x03 55 #define _MM_FROUND_CUR_DIRECTION 0x04 65 #define _MM_CMPINT_GE _MM_CMPINT_NLT 67 #define _MM_CMPINT_GT _MM_CMPINT_NLE 176 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) 177 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) 184 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
187 #define _mm512_setzero_epi32 _mm512_setzero_si512 192 return (__m512d)__builtin_ia32_undef512();
198 return (__m512)__builtin_ia32_undef512();
204 return (__m512)__builtin_ia32_undef512();
210 return (__m512i)__builtin_ia32_undef512();
216 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
223 return (__m512i)__builtin_ia32_selectd_512(__M,
231 return (__m512i)__builtin_ia32_selectd_512(__M,
239 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
240 0, 0, 0, 0, 0, 0, 0, 0);
246 return (__m512i)__builtin_ia32_selectq_512(__M,
255 return (__m512i)__builtin_ia32_selectq_512(__M,
264 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
265 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
268 #define _mm512_setzero _mm512_setzero_ps 273 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
279 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
280 __w, __w, __w, __w, __w, __w, __w, __w };
286 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
292 return __extension__ (__m512i)(__v64qi){
293 __w, __w, __w, __w, __w, __w, __w, __w,
294 __w, __w, __w, __w, __w, __w, __w, __w,
295 __w, __w, __w, __w, __w, __w, __w, __w,
296 __w, __w, __w, __w, __w, __w, __w, __w,
297 __w, __w, __w, __w, __w, __w, __w, __w,
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w };
306 return __extension__ (__m512i)(__v32hi){
307 __w, __w, __w, __w, __w, __w, __w, __w,
308 __w, __w, __w, __w, __w, __w, __w, __w,
309 __w, __w, __w, __w, __w, __w, __w, __w,
310 __w, __w, __w, __w, __w, __w, __w, __w };
316 return __extension__ (__m512i)(__v16si){
317 __s, __s, __s, __s, __s, __s, __s, __s,
318 __s, __s, __s, __s, __s, __s, __s, __s };
324 return (__m512i)__builtin_ia32_selectd_512(__M,
332 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
338 return (__m512i)__builtin_ia32_selectq_512(__M,
346 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
353 return __extension__ (__m512i)(__v16si)
354 { __D, __C, __B, __A, __D, __C, __B, __A,
355 __D, __C, __B, __A, __D, __C, __B, __A };
362 return __extension__ (__m512i) (__v8di)
363 { __D, __C, __B, __A, __D, __C, __B, __A };
369 return __extension__ (__m512d)
370 { __D, __C, __B, __A, __D, __C, __B, __A };
376 return __extension__ (__m512)
377 { __D, __C, __B, __A, __D, __C, __B, __A,
378 __D, __C, __B, __A, __D, __C, __B, __A };
381 #define _mm512_setr4_epi32(e0,e1,e2,e3) \ 382 _mm512_set4_epi32((e3),(e2),(e1),(e0)) 384 #define _mm512_setr4_epi64(e0,e1,e2,e3) \ 385 _mm512_set4_epi64((e3),(e2),(e1),(e0)) 387 #define _mm512_setr4_pd(e0,e1,e2,e3) \ 388 _mm512_set4_pd((e3),(e2),(e1),(e0)) 390 #define _mm512_setr4_ps(e0,e1,e2,e3) \ 391 _mm512_set4_ps((e3),(e2),(e1),(e0)) 396 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
397 0, 0, 0, 0, 0, 0, 0, 0);
405 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
411 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
412 -1, -1, -1, -1, -1, -1, -1, -1);
418 return __builtin_shufflevector(__a, __a, 0, 1);
424 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
430 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
436 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
442 return (__m512) (__A);
448 return (__m512i) (__A);
454 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
460 return (__m512d) (__A);
466 return (__m512i) (__A);
472 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
478 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
484 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
490 return (__m512) (__A);
496 return (__m512d) (__A);
502 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
508 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
514 return (__mmask16)
__a;
539 return __builtin_shufflevector((__v2df)__a, (__v2df)
_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
558 return __builtin_shufflevector((__v4df)__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
576 return __builtin_shufflevector((__v4sf)__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
594 return __builtin_shufflevector((__v8sf)__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
612 return __builtin_shufflevector((__v2di)__a, (__v2di)
_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
630 return __builtin_shufflevector((__v4di)__a, (__v4di)
_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
637 return (__m512i)((__v16su)__a & (__v16su)
__b);
643 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
658 return (__m512i)((__v8du)__a & (__v8du)
__b);
664 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
679 return (__m512i)(~(__v8du)__A & (__v8du)__B);
685 return (__m512i)(~(__v16su)__A & (__v16su)__B);
691 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
706 return (__m512i)(~(__v8du)__A & (__v8du)__B);
712 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
727 return (__m512i)((__v16su)__a | (__v16su)
__b);
733 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
747 return (__m512i)((__v8du)__a | (__v8du)
__b);
753 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
767 return (__m512i)((__v16su)__a ^ (__v16su)
__b);
773 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
787 return (__m512i)((__v8du)__a ^ (__v8du)
__b);
793 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
807 return (__m512i)((__v8du)__a & (__v8du)
__b);
813 return (__m512i)((__v8du)__a | (__v8du)
__b);
819 return (__m512i)((__v8du)__a ^ (__v8du)
__b);
827 return (__m512d)((__v8df)__a + (__v8df)
__b);
833 return (__m512)((__v16sf)__a + (__v16sf)
__b);
839 return (__m512d)((__v8df)__a * (__v8df)
__b);
845 return (__m512)((__v16sf)__a * (__v16sf)
__b);
851 return (__m512d)((__v8df)__a - (__v8df)
__b);
857 return (__m512)((__v16sf)__a - (__v16sf)
__b);
863 return (__m512i) ((__v8du) __A + (__v8du) __B);
869 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
877 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
885 return (__m512i) ((__v8du) __A - (__v8du) __B);
891 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
899 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
907 return (__m512i) ((__v16su) __A + (__v16su) __B);
913 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
921 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
929 return (__m512i) ((__v16su) __A - (__v16su) __B);
935 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
943 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
948 #define _mm512_max_round_pd(A, B, R) \ 949 (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \ 950 (__v8df)(__m512d)(B), (int)(R)) 952 #define _mm512_mask_max_round_pd(W, U, A, B, R) \ 953 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 954 (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 957 #define _mm512_maskz_max_round_pd(U, A, B, R) \ 958 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 959 (__v8df)_mm512_max_round_pd((A), (B), (R)), \ 960 (__v8df)_mm512_setzero_pd()) 965 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
972 return (__m512d)__builtin_ia32_selectpd_512(__U,
980 return (__m512d)__builtin_ia32_selectpd_512(__U,
985 #define _mm512_max_round_ps(A, B, R) \ 986 (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \ 987 (__v16sf)(__m512)(B), (int)(R)) 989 #define _mm512_mask_max_round_ps(W, U, A, B, R) \ 990 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 991 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 994 #define _mm512_maskz_max_round_ps(U, A, B, R) \ 995 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 996 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ 997 (__v16sf)_mm512_setzero_ps()) 1002 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1009 return (__m512)__builtin_ia32_selectps_512(__U,
1017 return (__m512)__builtin_ia32_selectps_512(__U,
1024 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1033 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1040 #define _mm_max_round_ss(A, B, R) \ 1041 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1042 (__v4sf)(__m128)(B), \ 1043 (__v4sf)_mm_setzero_ps(), \ 1044 (__mmask8)-1, (int)(R)) 1046 #define _mm_mask_max_round_ss(W, U, A, B, R) \ 1047 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1048 (__v4sf)(__m128)(B), \ 1049 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1052 #define _mm_maskz_max_round_ss(U, A, B, R) \ 1053 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \ 1054 (__v4sf)(__m128)(B), \ 1055 (__v4sf)_mm_setzero_ps(), \ 1056 (__mmask8)(U), (int)(R)) 1060 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1069 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1076 #define _mm_max_round_sd(A, B, R) \ 1077 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1078 (__v2df)(__m128d)(B), \ 1079 (__v2df)_mm_setzero_pd(), \ 1080 (__mmask8)-1, (int)(R)) 1082 #define _mm_mask_max_round_sd(W, U, A, B, R) \ 1083 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1084 (__v2df)(__m128d)(B), \ 1085 (__v2df)(__m128d)(W), \ 1086 (__mmask8)(U), (int)(R)) 1088 #define _mm_maskz_max_round_sd(U, A, B, R) \ 1089 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \ 1090 (__v2df)(__m128d)(B), \ 1091 (__v2df)_mm_setzero_pd(), \ 1092 (__mmask8)(U), (int)(R)) 1094 static __inline __m512i
1098 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1104 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1112 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1120 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1126 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1134 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1142 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1148 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1156 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1164 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1170 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1178 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1183 #define _mm512_min_round_pd(A, B, R) \ 1184 (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \ 1185 (__v8df)(__m512d)(B), (int)(R)) 1187 #define _mm512_mask_min_round_pd(W, U, A, B, R) \ 1188 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1189 (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1192 #define _mm512_maskz_min_round_pd(U, A, B, R) \ 1193 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1194 (__v8df)_mm512_min_round_pd((A), (B), (R)), \ 1195 (__v8df)_mm512_setzero_pd()) 1200 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1207 return (__m512d)__builtin_ia32_selectpd_512(__U,
1215 return (__m512d)__builtin_ia32_selectpd_512(__U,
1220 #define _mm512_min_round_ps(A, B, R) \ 1221 (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \ 1222 (__v16sf)(__m512)(B), (int)(R)) 1224 #define _mm512_mask_min_round_ps(W, U, A, B, R) \ 1225 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1226 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1229 #define _mm512_maskz_min_round_ps(U, A, B, R) \ 1230 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1231 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ 1232 (__v16sf)_mm512_setzero_ps()) 1237 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1244 return (__m512)__builtin_ia32_selectps_512(__U,
1252 return (__m512)__builtin_ia32_selectps_512(__U,
1259 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1268 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1275 #define _mm_min_round_ss(A, B, R) \ 1276 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1277 (__v4sf)(__m128)(B), \ 1278 (__v4sf)_mm_setzero_ps(), \ 1279 (__mmask8)-1, (int)(R)) 1281 #define _mm_mask_min_round_ss(W, U, A, B, R) \ 1282 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1283 (__v4sf)(__m128)(B), \ 1284 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1287 #define _mm_maskz_min_round_ss(U, A, B, R) \ 1288 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \ 1289 (__v4sf)(__m128)(B), \ 1290 (__v4sf)_mm_setzero_ps(), \ 1291 (__mmask8)(U), (int)(R)) 1295 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1304 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1311 #define _mm_min_round_sd(A, B, R) \ 1312 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1313 (__v2df)(__m128d)(B), \ 1314 (__v2df)_mm_setzero_pd(), \ 1315 (__mmask8)-1, (int)(R)) 1317 #define _mm_mask_min_round_sd(W, U, A, B, R) \ 1318 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1319 (__v2df)(__m128d)(B), \ 1320 (__v2df)(__m128d)(W), \ 1321 (__mmask8)(U), (int)(R)) 1323 #define _mm_maskz_min_round_sd(U, A, B, R) \ 1324 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \ 1325 (__v2df)(__m128d)(B), \ 1326 (__v2df)_mm_setzero_pd(), \ 1327 (__mmask8)(U), (int)(R)) 1329 static __inline __m512i
1333 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1339 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1347 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1355 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1361 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1369 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1377 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1383 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1391 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1399 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1405 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1413 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1421 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1427 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1435 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1443 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1449 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1457 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1465 return (__m512i) ((__v16su) __A * (__v16su) __B);
1471 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1479 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1486 return (__m512i) ((__v8du) __A * (__v8du) __B);
1491 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1496 #define _mm512_sqrt_round_pd(A, R) \ 1497 (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)) 1499 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ 1500 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1501 (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1502 (__v8df)(__m512d)(W)) 1504 #define _mm512_maskz_sqrt_round_pd(U, A, R) \ 1505 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1506 (__v8df)_mm512_sqrt_round_pd((A), (R)), \ 1507 (__v8df)_mm512_setzero_pd()) 1512 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1519 return (__m512d)__builtin_ia32_selectpd_512(__U,
1527 return (__m512d)__builtin_ia32_selectpd_512(__U,
1532 #define _mm512_sqrt_round_ps(A, R) \ 1533 (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)) 1535 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ 1536 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1537 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1538 (__v16sf)(__m512)(W)) 1540 #define _mm512_maskz_sqrt_round_ps(U, A, R) \ 1541 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 1542 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ 1543 (__v16sf)_mm512_setzero_ps()) 1548 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1555 return (__m512)__builtin_ia32_selectps_512(__U,
1563 return (__m512)__builtin_ia32_selectps_512(__U,
1571 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1587 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1596 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1613 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1622 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1632 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1641 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1650 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1660 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1669 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1678 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1695 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1704 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1721 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1730 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1740 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1749 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1758 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1768 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1777 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1786 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1795 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1804 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1813 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1822 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1831 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1840 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1849 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1858 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1864 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1872 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1880 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1886 return (__m512i)__builtin_ia32_selectd_512(__U,
1894 return (__m512i)__builtin_ia32_selectd_512(__U,
1902 return __builtin_ia32_selectss_128(__U, __A, __W);
1911 #define _mm_add_round_ss(A, B, R) \ 1912 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1913 (__v4sf)(__m128)(B), \ 1914 (__v4sf)_mm_setzero_ps(), \ 1915 (__mmask8)-1, (int)(R)) 1917 #define _mm_mask_add_round_ss(W, U, A, B, R) \ 1918 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1919 (__v4sf)(__m128)(B), \ 1920 (__v4sf)(__m128)(W), (__mmask8)(U), \ 1923 #define _mm_maskz_add_round_ss(U, A, B, R) \ 1924 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \ 1925 (__v4sf)(__m128)(B), \ 1926 (__v4sf)_mm_setzero_ps(), \ 1927 (__mmask8)(U), (int)(R)) 1932 return __builtin_ia32_selectsd_128(__U, __A, __W);
1940 #define _mm_add_round_sd(A, B, R) \ 1941 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1942 (__v2df)(__m128d)(B), \ 1943 (__v2df)_mm_setzero_pd(), \ 1944 (__mmask8)-1, (int)(R)) 1946 #define _mm_mask_add_round_sd(W, U, A, B, R) \ 1947 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1948 (__v2df)(__m128d)(B), \ 1949 (__v2df)(__m128d)(W), \ 1950 (__mmask8)(U), (int)(R)) 1952 #define _mm_maskz_add_round_sd(U, A, B, R) \ 1953 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \ 1954 (__v2df)(__m128d)(B), \ 1955 (__v2df)_mm_setzero_pd(), \ 1956 (__mmask8)(U), (int)(R)) 1960 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1967 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1974 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1981 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1986 #define _mm512_add_round_pd(A, B, R) \ 1987 (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \ 1988 (__v8df)(__m512d)(B), (int)(R)) 1990 #define _mm512_mask_add_round_pd(W, U, A, B, R) \ 1991 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1992 (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 1993 (__v8df)(__m512d)(W)); 1995 #define _mm512_maskz_add_round_pd(U, A, B, R) \ 1996 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 1997 (__v8df)_mm512_add_round_pd((A), (B), (R)), \ 1998 (__v8df)_mm512_setzero_pd()); 2000 #define _mm512_add_round_ps(A, B, R) \ 2001 (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ 2002 (__v16sf)(__m512)(B), (int)(R)) 2004 #define _mm512_mask_add_round_ps(W, U, A, B, R) \ 2005 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2006 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 2007 (__v16sf)(__m512)(W)); 2009 #define _mm512_maskz_add_round_ps(U, A, B, R) \ 2010 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2011 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ 2012 (__v16sf)_mm512_setzero_ps()); 2017 return __builtin_ia32_selectss_128(__U, __A, __W);
2025 #define _mm_sub_round_ss(A, B, R) \ 2026 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2027 (__v4sf)(__m128)(B), \ 2028 (__v4sf)_mm_setzero_ps(), \ 2029 (__mmask8)-1, (int)(R)) 2031 #define _mm_mask_sub_round_ss(W, U, A, B, R) \ 2032 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2033 (__v4sf)(__m128)(B), \ 2034 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2037 #define _mm_maskz_sub_round_ss(U, A, B, R) \ 2038 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \ 2039 (__v4sf)(__m128)(B), \ 2040 (__v4sf)_mm_setzero_ps(), \ 2041 (__mmask8)(U), (int)(R)) 2046 return __builtin_ia32_selectsd_128(__U, __A, __W);
2055 #define _mm_sub_round_sd(A, B, R) \ 2056 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2057 (__v2df)(__m128d)(B), \ 2058 (__v2df)_mm_setzero_pd(), \ 2059 (__mmask8)-1, (int)(R)) 2061 #define _mm_mask_sub_round_sd(W, U, A, B, R) \ 2062 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2063 (__v2df)(__m128d)(B), \ 2064 (__v2df)(__m128d)(W), \ 2065 (__mmask8)(U), (int)(R)) 2067 #define _mm_maskz_sub_round_sd(U, A, B, R) \ 2068 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \ 2069 (__v2df)(__m128d)(B), \ 2070 (__v2df)_mm_setzero_pd(), \ 2071 (__mmask8)(U), (int)(R)) 2075 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2082 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2089 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2096 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2101 #define _mm512_sub_round_pd(A, B, R) \ 2102 (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \ 2103 (__v8df)(__m512d)(B), (int)(R)) 2105 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ 2106 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2107 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2108 (__v8df)(__m512d)(W)); 2110 #define _mm512_maskz_sub_round_pd(U, A, B, R) \ 2111 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2112 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ 2113 (__v8df)_mm512_setzero_pd()); 2115 #define _mm512_sub_round_ps(A, B, R) \ 2116 (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ 2117 (__v16sf)(__m512)(B), (int)(R)) 2119 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ 2120 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2121 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2122 (__v16sf)(__m512)(W)); 2124 #define _mm512_maskz_sub_round_ps(U, A, B, R) \ 2125 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2126 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ 2127 (__v16sf)_mm512_setzero_ps()); 2132 return __builtin_ia32_selectss_128(__U, __A, __W);
2140 #define _mm_mul_round_ss(A, B, R) \ 2141 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2142 (__v4sf)(__m128)(B), \ 2143 (__v4sf)_mm_setzero_ps(), \ 2144 (__mmask8)-1, (int)(R)) 2146 #define _mm_mask_mul_round_ss(W, U, A, B, R) \ 2147 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2148 (__v4sf)(__m128)(B), \ 2149 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2152 #define _mm_maskz_mul_round_ss(U, A, B, R) \ 2153 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \ 2154 (__v4sf)(__m128)(B), \ 2155 (__v4sf)_mm_setzero_ps(), \ 2156 (__mmask8)(U), (int)(R)) 2161 return __builtin_ia32_selectsd_128(__U, __A, __W);
2170 #define _mm_mul_round_sd(A, B, R) \ 2171 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2172 (__v2df)(__m128d)(B), \ 2173 (__v2df)_mm_setzero_pd(), \ 2174 (__mmask8)-1, (int)(R)) 2176 #define _mm_mask_mul_round_sd(W, U, A, B, R) \ 2177 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2178 (__v2df)(__m128d)(B), \ 2179 (__v2df)(__m128d)(W), \ 2180 (__mmask8)(U), (int)(R)) 2182 #define _mm_maskz_mul_round_sd(U, A, B, R) \ 2183 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \ 2184 (__v2df)(__m128d)(B), \ 2185 (__v2df)_mm_setzero_pd(), \ 2186 (__mmask8)(U), (int)(R)) 2190 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2197 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2204 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2211 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2216 #define _mm512_mul_round_pd(A, B, R) \ 2217 (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \ 2218 (__v8df)(__m512d)(B), (int)(R)) 2220 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ 2221 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2222 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2223 (__v8df)(__m512d)(W)); 2225 #define _mm512_maskz_mul_round_pd(U, A, B, R) \ 2226 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2227 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ 2228 (__v8df)_mm512_setzero_pd()); 2230 #define _mm512_mul_round_ps(A, B, R) \ 2231 (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ 2232 (__v16sf)(__m512)(B), (int)(R)) 2234 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ 2235 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2236 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2237 (__v16sf)(__m512)(W)); 2239 #define _mm512_maskz_mul_round_ps(U, A, B, R) \ 2240 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2241 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ 2242 (__v16sf)_mm512_setzero_ps()); 2247 return __builtin_ia32_selectss_128(__U, __A, __W);
2256 #define _mm_div_round_ss(A, B, R) \ 2257 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2258 (__v4sf)(__m128)(B), \ 2259 (__v4sf)_mm_setzero_ps(), \ 2260 (__mmask8)-1, (int)(R)) 2262 #define _mm_mask_div_round_ss(W, U, A, B, R) \ 2263 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2264 (__v4sf)(__m128)(B), \ 2265 (__v4sf)(__m128)(W), (__mmask8)(U), \ 2268 #define _mm_maskz_div_round_ss(U, A, B, R) \ 2269 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \ 2270 (__v4sf)(__m128)(B), \ 2271 (__v4sf)_mm_setzero_ps(), \ 2272 (__mmask8)(U), (int)(R)) 2277 return __builtin_ia32_selectsd_128(__U, __A, __W);
2286 #define _mm_div_round_sd(A, B, R) \ 2287 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2288 (__v2df)(__m128d)(B), \ 2289 (__v2df)_mm_setzero_pd(), \ 2290 (__mmask8)-1, (int)(R)) 2292 #define _mm_mask_div_round_sd(W, U, A, B, R) \ 2293 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2294 (__v2df)(__m128d)(B), \ 2295 (__v2df)(__m128d)(W), \ 2296 (__mmask8)(U), (int)(R)) 2298 #define _mm_maskz_div_round_sd(U, A, B, R) \ 2299 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \ 2300 (__v2df)(__m128d)(B), \ 2301 (__v2df)_mm_setzero_pd(), \ 2302 (__mmask8)(U), (int)(R)) 2307 return (__m512d)((__v8df)__a/(__v8df)
__b);
2312 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2319 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2327 return (__m512)((__v16sf)__a/(__v16sf)
__b);
2332 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2339 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2344 #define _mm512_div_round_pd(A, B, R) \ 2345 (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \ 2346 (__v8df)(__m512d)(B), (int)(R)) 2348 #define _mm512_mask_div_round_pd(W, U, A, B, R) \ 2349 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2350 (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2351 (__v8df)(__m512d)(W)); 2353 #define _mm512_maskz_div_round_pd(U, A, B, R) \ 2354 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 2355 (__v8df)_mm512_div_round_pd((A), (B), (R)), \ 2356 (__v8df)_mm512_setzero_pd()); 2358 #define _mm512_div_round_ps(A, B, R) \ 2359 (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ 2360 (__v16sf)(__m512)(B), (int)(R)) 2362 #define _mm512_mask_div_round_ps(W, U, A, B, R) \ 2363 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2364 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2365 (__v16sf)(__m512)(W)); 2367 #define _mm512_maskz_div_round_ps(U, A, B, R) \ 2368 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 2369 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ 2370 (__v16sf)_mm512_setzero_ps()); 2372 #define _mm512_roundscale_ps(A, B) \ 2373 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ 2374 (__v16sf)_mm512_undefined_ps(), \ 2376 _MM_FROUND_CUR_DIRECTION) 2378 #define _mm512_mask_roundscale_ps(A, B, C, imm) \ 2379 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2380 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2381 _MM_FROUND_CUR_DIRECTION) 2383 #define _mm512_maskz_roundscale_ps(A, B, imm) \ 2384 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2385 (__v16sf)_mm512_setzero_ps(), \ 2387 _MM_FROUND_CUR_DIRECTION) 2389 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \ 2390 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \ 2391 (__v16sf)(__m512)(A), (__mmask16)(B), \ 2394 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \ 2395 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \ 2396 (__v16sf)_mm512_setzero_ps(), \ 2397 (__mmask16)(A), (int)(R)) 2399 #define _mm512_roundscale_round_ps(A, imm, R) \ 2400 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \ 2401 (__v16sf)_mm512_undefined_ps(), \ 2402 (__mmask16)-1, (int)(R)) 2404 #define _mm512_roundscale_pd(A, B) \ 2405 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \ 2406 (__v8df)_mm512_undefined_pd(), \ 2408 _MM_FROUND_CUR_DIRECTION) 2410 #define _mm512_mask_roundscale_pd(A, B, C, imm) \ 2411 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2412 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2413 _MM_FROUND_CUR_DIRECTION) 2415 #define _mm512_maskz_roundscale_pd(A, B, imm) \ 2416 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2417 (__v8df)_mm512_setzero_pd(), \ 2419 _MM_FROUND_CUR_DIRECTION) 2421 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \ 2422 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \ 2423 (__v8df)(__m512d)(A), (__mmask8)(B), \ 2426 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \ 2427 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \ 2428 (__v8df)_mm512_setzero_pd(), \ 2429 (__mmask8)(A), (int)(R)) 2431 #define _mm512_roundscale_round_pd(A, imm, R) \ 2432 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \ 2433 (__v8df)_mm512_undefined_pd(), \ 2434 (__mmask8)-1, (int)(R)) 2436 #define _mm512_fmadd_round_pd(A, B, C, R) \ 2437 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2438 (__v8df)(__m512d)(B), \ 2439 (__v8df)(__m512d)(C), \ 2440 (__mmask8)-1, (int)(R)) 2443 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 2444 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2445 (__v8df)(__m512d)(B), \ 2446 (__v8df)(__m512d)(C), \ 2447 (__mmask8)(U), (int)(R)) 2450 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 2451 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \ 2452 (__v8df)(__m512d)(B), \ 2453 (__v8df)(__m512d)(C), \ 2454 (__mmask8)(U), (int)(R)) 2457 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 2458 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2459 (__v8df)(__m512d)(B), \ 2460 (__v8df)(__m512d)(C), \ 2461 (__mmask8)(U), (int)(R)) 2464 #define _mm512_fmsub_round_pd(A, B, C, R) \ 2465 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2466 (__v8df)(__m512d)(B), \ 2467 -(__v8df)(__m512d)(C), \ 2468 (__mmask8)-1, (int)(R)) 2471 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 2472 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 2473 (__v8df)(__m512d)(B), \ 2474 -(__v8df)(__m512d)(C), \ 2475 (__mmask8)(U), (int)(R)) 2478 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 2479 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \ 2480 (__v8df)(__m512d)(B), \ 2481 -(__v8df)(__m512d)(C), \ 2482 (__mmask8)(U), (int)(R)) 2485 #define _mm512_fnmadd_round_pd(A, B, C, R) \ 2486 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2487 (__v8df)(__m512d)(B), \ 2488 (__v8df)(__m512d)(C), \ 2489 (__mmask8)-1, (int)(R)) 2492 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 2493 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \ 2494 (__v8df)(__m512d)(B), \ 2495 (__v8df)(__m512d)(C), \ 2496 (__mmask8)(U), (int)(R)) 2499 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 2500 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2501 (__v8df)(__m512d)(B), \ 2502 (__v8df)(__m512d)(C), \ 2503 (__mmask8)(U), (int)(R)) 2506 #define _mm512_fnmsub_round_pd(A, B, C, R) \ 2507 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \ 2508 (__v8df)(__m512d)(B), \ 2509 -(__v8df)(__m512d)(C), \ 2510 (__mmask8)-1, (int)(R)) 2513 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 2514 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \ 2515 (__v8df)(__m512d)(B), \ 2516 -(__v8df)(__m512d)(C), \ 2517 (__mmask8)(U), (int)(R)) 2523 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2533 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2543 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2553 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2563 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2573 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2583 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2593 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2603 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2613 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2623 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2633 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2640 #define _mm512_fmadd_round_ps(A, B, C, R) \ 2641 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2642 (__v16sf)(__m512)(B), \ 2643 (__v16sf)(__m512)(C), \ 2644 (__mmask16)-1, (int)(R)) 2647 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 2648 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2649 (__v16sf)(__m512)(B), \ 2650 (__v16sf)(__m512)(C), \ 2651 (__mmask16)(U), (int)(R)) 2654 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 2655 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \ 2656 (__v16sf)(__m512)(B), \ 2657 (__v16sf)(__m512)(C), \ 2658 (__mmask16)(U), (int)(R)) 2661 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 2662 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2663 (__v16sf)(__m512)(B), \ 2664 (__v16sf)(__m512)(C), \ 2665 (__mmask16)(U), (int)(R)) 2668 #define _mm512_fmsub_round_ps(A, B, C, R) \ 2669 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2670 (__v16sf)(__m512)(B), \ 2671 -(__v16sf)(__m512)(C), \ 2672 (__mmask16)-1, (int)(R)) 2675 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 2676 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2677 (__v16sf)(__m512)(B), \ 2678 -(__v16sf)(__m512)(C), \ 2679 (__mmask16)(U), (int)(R)) 2682 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 2683 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \ 2684 (__v16sf)(__m512)(B), \ 2685 -(__v16sf)(__m512)(C), \ 2686 (__mmask16)(U), (int)(R)) 2689 #define _mm512_fnmadd_round_ps(A, B, C, R) \ 2690 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2691 -(__v16sf)(__m512)(B), \ 2692 (__v16sf)(__m512)(C), \ 2693 (__mmask16)-1, (int)(R)) 2696 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 2697 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \ 2698 (__v16sf)(__m512)(B), \ 2699 (__v16sf)(__m512)(C), \ 2700 (__mmask16)(U), (int)(R)) 2703 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 2704 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2705 (__v16sf)(__m512)(B), \ 2706 (__v16sf)(__m512)(C), \ 2707 (__mmask16)(U), (int)(R)) 2710 #define _mm512_fnmsub_round_ps(A, B, C, R) \ 2711 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 2712 -(__v16sf)(__m512)(B), \ 2713 -(__v16sf)(__m512)(C), \ 2714 (__mmask16)-1, (int)(R)) 2717 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 2718 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \ 2719 (__v16sf)(__m512)(B), \ 2720 -(__v16sf)(__m512)(C), \ 2721 (__mmask16)(U), (int)(R)) 2727 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2737 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2747 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2757 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2767 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2777 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2787 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2797 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2807 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2817 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2827 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2837 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2844 #define _mm512_fmaddsub_round_pd(A, B, C, R) \ 2845 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2846 (__v8df)(__m512d)(B), \ 2847 (__v8df)(__m512d)(C), \ 2848 (__mmask8)-1, (int)(R)) 2851 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 2852 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2853 (__v8df)(__m512d)(B), \ 2854 (__v8df)(__m512d)(C), \ 2855 (__mmask8)(U), (int)(R)) 2858 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 2859 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \ 2860 (__v8df)(__m512d)(B), \ 2861 (__v8df)(__m512d)(C), \ 2862 (__mmask8)(U), (int)(R)) 2865 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 2866 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2867 (__v8df)(__m512d)(B), \ 2868 (__v8df)(__m512d)(C), \ 2869 (__mmask8)(U), (int)(R)) 2872 #define _mm512_fmsubadd_round_pd(A, B, C, R) \ 2873 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2874 (__v8df)(__m512d)(B), \ 2875 -(__v8df)(__m512d)(C), \ 2876 (__mmask8)-1, (int)(R)) 2879 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 2880 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \ 2881 (__v8df)(__m512d)(B), \ 2882 -(__v8df)(__m512d)(C), \ 2883 (__mmask8)(U), (int)(R)) 2886 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 2887 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \ 2888 (__v8df)(__m512d)(B), \ 2889 -(__v8df)(__m512d)(C), \ 2890 (__mmask8)(U), (int)(R)) 2896 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2906 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2916 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2926 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2936 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2946 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2956 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2963 #define _mm512_fmaddsub_round_ps(A, B, C, R) \ 2964 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2965 (__v16sf)(__m512)(B), \ 2966 (__v16sf)(__m512)(C), \ 2967 (__mmask16)-1, (int)(R)) 2970 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 2971 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2972 (__v16sf)(__m512)(B), \ 2973 (__v16sf)(__m512)(C), \ 2974 (__mmask16)(U), (int)(R)) 2977 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 2978 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \ 2979 (__v16sf)(__m512)(B), \ 2980 (__v16sf)(__m512)(C), \ 2981 (__mmask16)(U), (int)(R)) 2984 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 2985 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 2986 (__v16sf)(__m512)(B), \ 2987 (__v16sf)(__m512)(C), \ 2988 (__mmask16)(U), (int)(R)) 2991 #define _mm512_fmsubadd_round_ps(A, B, C, R) \ 2992 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 2993 (__v16sf)(__m512)(B), \ 2994 -(__v16sf)(__m512)(C), \ 2995 (__mmask16)-1, (int)(R)) 2998 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 2999 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \ 3000 (__v16sf)(__m512)(B), \ 3001 -(__v16sf)(__m512)(C), \ 3002 (__mmask16)(U), (int)(R)) 3005 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 3006 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \ 3007 (__v16sf)(__m512)(B), \ 3008 -(__v16sf)(__m512)(C), \ 3009 (__mmask16)(U), (int)(R)) 3015 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3025 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3035 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3045 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3055 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3065 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3075 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3082 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3083 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \ 3084 (__v8df)(__m512d)(B), \ 3085 (__v8df)(__m512d)(C), \ 3086 (__mmask8)(U), (int)(R)) 3092 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3099 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3100 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \ 3101 (__v16sf)(__m512)(B), \ 3102 (__v16sf)(__m512)(C), \ 3103 (__mmask16)(U), (int)(R)) 3108 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3115 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3116 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \ 3117 (__v8df)(__m512d)(B), \ 3118 (__v8df)(__m512d)(C), \ 3119 (__mmask8)(U), (int)(R)) 3125 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3132 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3133 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \ 3134 (__v16sf)(__m512)(B), \ 3135 (__v16sf)(__m512)(C), \ 3136 (__mmask16)(U), (int)(R)) 3142 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3149 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3150 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 3151 -(__v8df)(__m512d)(B), \ 3152 (__v8df)(__m512d)(C), \ 3153 (__mmask8)(U), (int)(R)) 3159 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3166 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3167 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 3168 -(__v16sf)(__m512)(B), \ 3169 (__v16sf)(__m512)(C), \ 3170 (__mmask16)(U), (int)(R)) 3176 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3183 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3184 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \ 3185 -(__v8df)(__m512d)(B), \ 3186 -(__v8df)(__m512d)(C), \ 3187 (__mmask8)(U), (int)(R)) 3190 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3191 (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \ 3192 (__v8df)(__m512d)(B), \ 3193 (__v8df)(__m512d)(C), \ 3194 (__mmask8)(U), (int)(R)) 3200 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3210 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3217 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3218 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \ 3219 -(__v16sf)(__m512)(B), \ 3220 -(__v16sf)(__m512)(C), \ 3221 (__mmask16)(U), (int)(R)) 3224 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3225 (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \ 3226 (__v16sf)(__m512)(B), \ 3227 (__v16sf)(__m512)(C), \ 3228 (__mmask16)(U), (int)(R)) 3234 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3244 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3258 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3266 return (__m512i)__builtin_ia32_selectd_512(__U,
3275 return (__m512i)__builtin_ia32_selectd_512(__U,
3284 return (__m512i)__builtin_ia32_selectd_512(__U,
3292 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3300 return (__m512i)__builtin_ia32_selectq_512(__U,
3309 return (__m512i)__builtin_ia32_selectq_512(__U,
3318 return (__m512i)__builtin_ia32_selectq_512(__U,
3323 #define _mm512_alignr_epi64(A, B, I) \ 3324 (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \ 3325 (__v8di)(__m512i)(B), (int)(I)) 3327 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \ 3328 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3329 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3330 (__v8di)(__m512i)(W)) 3332 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \ 3333 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 3334 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \ 3335 (__v8di)_mm512_setzero_si512()) 3337 #define _mm512_alignr_epi32(A, B, I) \ 3338 (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \ 3339 (__v16si)(__m512i)(B), (int)(I)) 3341 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \ 3342 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3343 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3344 (__v16si)(__m512i)(W)) 3346 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \ 3347 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 3348 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \ 3349 (__v16si)_mm512_setzero_si512()) 3352 #define _mm512_extractf64x4_pd(A, I) \ 3353 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ 3354 (__v4df)_mm256_undefined_pd(), \ 3357 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ 3358 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3359 (__v4df)(__m256d)(W), \ 3362 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \ 3363 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ 3364 (__v4df)_mm256_setzero_pd(), \ 3367 #define _mm512_extractf32x4_ps(A, I) \ 3368 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ 3369 (__v4sf)_mm_undefined_ps(), \ 3372 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ 3373 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3374 (__v4sf)(__m128)(W), \ 3377 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \ 3378 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ 3379 (__v4sf)_mm_setzero_ps(), \ 3387 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3395 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3403 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3411 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3418 #define _mm512_cmp_round_ps_mask(A, B, P, R) \ 3419 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3420 (__v16sf)(__m512)(B), (int)(P), \ 3421 (__mmask16)-1, (int)(R)) 3423 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \ 3424 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 3425 (__v16sf)(__m512)(B), (int)(P), \ 3426 (__mmask16)(U), (int)(R)) 3428 #define _mm512_cmp_ps_mask(A, B, P) \ 3429 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3430 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 3431 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3433 #define _mm512_cmpeq_ps_mask(A, B) \ 3434 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) 3435 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \ 3436 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) 3438 #define _mm512_cmplt_ps_mask(A, B) \ 3439 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) 3440 #define _mm512_mask_cmplt_ps_mask(k, A, B) \ 3441 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) 3443 #define _mm512_cmple_ps_mask(A, B) \ 3444 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) 3445 #define _mm512_mask_cmple_ps_mask(k, A, B) \ 3446 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) 3448 #define _mm512_cmpunord_ps_mask(A, B) \ 3449 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) 3450 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \ 3451 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) 3453 #define _mm512_cmpneq_ps_mask(A, B) \ 3454 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) 3455 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \ 3456 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) 3458 #define _mm512_cmpnlt_ps_mask(A, B) \ 3459 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) 3460 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ 3461 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) 3463 #define _mm512_cmpnle_ps_mask(A, B) \ 3464 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) 3465 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \ 3466 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) 3468 #define _mm512_cmpord_ps_mask(A, B) \ 3469 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) 3470 #define _mm512_mask_cmpord_ps_mask(k, A, B) \ 3471 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) 3473 #define _mm512_cmp_round_pd_mask(A, B, P, R) \ 3474 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3475 (__v8df)(__m512d)(B), (int)(P), \ 3476 (__mmask8)-1, (int)(R)) 3478 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \ 3479 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 3480 (__v8df)(__m512d)(B), (int)(P), \ 3481 (__mmask8)(U), (int)(R)) 3483 #define _mm512_cmp_pd_mask(A, B, P) \ 3484 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3485 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 3486 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 3488 #define _mm512_cmpeq_pd_mask(A, B) \ 3489 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) 3490 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \ 3491 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) 3493 #define _mm512_cmplt_pd_mask(A, B) \ 3494 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) 3495 #define _mm512_mask_cmplt_pd_mask(k, A, B) \ 3496 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) 3498 #define _mm512_cmple_pd_mask(A, B) \ 3499 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) 3500 #define _mm512_mask_cmple_pd_mask(k, A, B) \ 3501 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) 3503 #define _mm512_cmpunord_pd_mask(A, B) \ 3504 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) 3505 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \ 3506 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) 3508 #define _mm512_cmpneq_pd_mask(A, B) \ 3509 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) 3510 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \ 3511 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) 3513 #define _mm512_cmpnlt_pd_mask(A, B) \ 3514 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) 3515 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ 3516 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) 3518 #define _mm512_cmpnle_pd_mask(A, B) \ 3519 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) 3520 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \ 3521 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) 3523 #define _mm512_cmpord_pd_mask(A, B) \ 3524 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) 3525 #define _mm512_mask_cmpord_pd_mask(k, A, B) \ 3526 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) 3530 #define _mm512_cvtt_roundps_epu32(A, R) \ 3531 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3532 (__v16si)_mm512_undefined_epi32(), \ 3533 (__mmask16)-1, (int)(R)) 3535 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \ 3536 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3537 (__v16si)(__m512i)(W), \ 3538 (__mmask16)(U), (int)(R)) 3540 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \ 3541 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \ 3542 (__v16si)_mm512_setzero_si512(), \ 3543 (__mmask16)(U), (int)(R)) 3549 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3559 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3568 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3574 #define _mm512_cvt_roundepi32_ps(A, R) \ 3575 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3576 (__v16sf)_mm512_setzero_ps(), \ 3577 (__mmask16)-1, (int)(R)) 3579 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \ 3580 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3581 (__v16sf)(__m512)(W), \ 3582 (__mmask16)(U), (int)(R)) 3584 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \ 3585 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \ 3586 (__v16sf)_mm512_setzero_ps(), \ 3587 (__mmask16)(U), (int)(R)) 3589 #define _mm512_cvt_roundepu32_ps(A, R) \ 3590 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3591 (__v16sf)_mm512_setzero_ps(), \ 3592 (__mmask16)-1, (int)(R)) 3594 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \ 3595 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3596 (__v16sf)(__m512)(W), \ 3597 (__mmask16)(U), (int)(R)) 3599 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \ 3600 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \ 3601 (__v16sf)_mm512_setzero_ps(), \ 3602 (__mmask16)(U), (int)(R)) 3607 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3613 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3621 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3629 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3635 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3643 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3663 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3669 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3677 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3685 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3691 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3699 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3716 #define _mm512_cvt_roundpd_ps(A, R) \ 3717 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3718 (__v8sf)_mm256_setzero_ps(), \ 3719 (__mmask8)-1, (int)(R)) 3721 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \ 3722 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3723 (__v8sf)(__m256)(W), (__mmask8)(U), \ 3726 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \ 3727 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \ 3728 (__v8sf)_mm256_setzero_ps(), \ 3729 (__mmask8)(U), (int)(R)) 3734 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3743 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3752 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3761 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3763 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3769 return (__m512) __builtin_shufflevector (
3773 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3776 #define _mm512_cvt_roundps_ph(A, I) \ 3777 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3778 (__v16hi)_mm256_undefined_si256(), \ 3781 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 3782 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3783 (__v16hi)(__m256i)(U), \ 3786 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 3787 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3788 (__v16hi)_mm256_setzero_si256(), \ 3791 #define _mm512_cvtps_ph(A, I) \ 3792 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3793 (__v16hi)_mm256_setzero_si256(), \ 3796 #define _mm512_mask_cvtps_ph(U, W, A, I) \ 3797 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3798 (__v16hi)(__m256i)(U), \ 3801 #define _mm512_maskz_cvtps_ph(W, A, I) \ 3802 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ 3803 (__v16hi)_mm256_setzero_si256(), \ 3806 #define _mm512_cvt_roundph_ps(A, R) \ 3807 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3808 (__v16sf)_mm512_undefined_ps(), \ 3809 (__mmask16)-1, (int)(R)) 3811 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \ 3812 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3813 (__v16sf)(__m512)(W), \ 3814 (__mmask16)(U), (int)(R)) 3816 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \ 3817 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ 3818 (__v16sf)_mm512_setzero_ps(), \ 3819 (__mmask16)(U), (int)(R)) 3825 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3835 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3844 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3850 #define _mm512_cvtt_roundpd_epi32(A, R) \ 3851 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3852 (__v8si)_mm256_setzero_si256(), \ 3853 (__mmask8)-1, (int)(R)) 3855 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \ 3856 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3857 (__v8si)(__m256i)(W), \ 3858 (__mmask8)(U), (int)(R)) 3860 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \ 3861 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \ 3862 (__v8si)_mm256_setzero_si256(), \ 3863 (__mmask8)(U), (int)(R)) 3868 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3877 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3886 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3892 #define _mm512_cvtt_roundps_epi32(A, R) \ 3893 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3894 (__v16si)_mm512_setzero_si512(), \ 3895 (__mmask16)-1, (int)(R)) 3897 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \ 3898 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3899 (__v16si)(__m512i)(W), \ 3900 (__mmask16)(U), (int)(R)) 3902 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \ 3903 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \ 3904 (__v16si)_mm512_setzero_si512(), \ 3905 (__mmask16)(U), (int)(R)) 3911 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3919 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3928 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3934 #define _mm512_cvt_roundps_epi32(A, R) \ 3935 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3936 (__v16si)_mm512_setzero_si512(), \ 3937 (__mmask16)-1, (int)(R)) 3939 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \ 3940 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3941 (__v16si)(__m512i)(W), \ 3942 (__mmask16)(U), (int)(R)) 3944 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \ 3945 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \ 3946 (__v16si)_mm512_setzero_si512(), \ 3947 (__mmask16)(U), (int)(R)) 3952 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3961 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3970 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3977 #define _mm512_cvt_roundpd_epi32(A, R) \ 3978 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3979 (__v8si)_mm256_setzero_si256(), \ 3980 (__mmask8)-1, (int)(R)) 3982 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \ 3983 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3984 (__v8si)(__m256i)(W), \ 3985 (__mmask8)(U), (int)(R)) 3987 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \ 3988 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \ 3989 (__v8si)_mm256_setzero_si256(), \ 3990 (__mmask8)(U), (int)(R)) 3995 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4005 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4014 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4021 #define _mm512_cvt_roundps_epu32(A, R) \ 4022 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4023 (__v16si)_mm512_setzero_si512(), \ 4024 (__mmask16)-1, (int)(R)) 4026 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \ 4027 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4028 (__v16si)(__m512i)(W), \ 4029 (__mmask16)(U), (int)(R)) 4031 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \ 4032 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \ 4033 (__v16si)_mm512_setzero_si512(), \ 4034 (__mmask16)(U), (int)(R)) 4039 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4049 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4058 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4065 #define _mm512_cvt_roundpd_epu32(A, R) \ 4066 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4067 (__v8si)_mm256_setzero_si256(), \ 4068 (__mmask8)-1, (int)(R)) 4070 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ 4071 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4072 (__v8si)(__m256i)(W), \ 4073 (__mmask8)(U), (int)(R)) 4075 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ 4076 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ 4077 (__v8si)_mm256_setzero_si256(), \ 4078 (__mmask8)(U), (int)(R)) 4083 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4093 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4102 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4126 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4127 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4133 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4141 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4149 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4150 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4156 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4164 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4172 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4174 2+4, 18+4, 3+4, 19+4,
4175 2+8, 18+8, 3+8, 19+8,
4176 2+12, 18+12, 3+12, 19+12);
4182 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4190 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4198 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4200 0+4, 16+4, 1+4, 17+4,
4201 0+8, 16+8, 1+8, 17+8,
4202 0+12, 16+12, 1+12, 17+12);
4208 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4216 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4224 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4226 2+4, 18+4, 3+4, 19+4,
4227 2+8, 18+8, 3+8, 19+8,
4228 2+12, 18+12, 3+12, 19+12);
4234 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4242 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4250 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4252 0+4, 16+4, 1+4, 17+4,
4253 0+8, 16+8, 1+8, 17+8,
4254 0+12, 16+12, 1+12, 17+12);
4260 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4268 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4276 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4277 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4283 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4291 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4299 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4300 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4306 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4314 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4325 struct __loadu_si512 {
4328 return ((
struct __loadu_si512*)__P)->__v;
4334 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *) __P,
4343 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)__P,
4352 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *) __P,
4360 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)__P,
4369 return (__m512) __builtin_ia32_loadups512_mask ((
const float *) __P,
4377 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)__P,
4386 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *) __P,
4394 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)__P,
4406 return ((
struct __loadu_pd*)__p)->__v;
4415 return ((
struct __loadu_ps*)__p)->__v;
4421 return *(__m512*)__p;
4427 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *) __P,
4435 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)__P,
4444 return *(__m512d*)__p;
4450 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *) __P,
4458 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)__P,
4467 return *(__m512i *) __P;
4473 return *(__m512i *) __P;
4479 return *(__m512i *) __P;
4487 __builtin_ia32_storedqudi512_mask ((
long long *)__P, (__v8di) __A,
4494 struct __storeu_si512 {
4497 ((
struct __storeu_si512*)__P)->__v = __A;
4503 __builtin_ia32_storedqusi512_mask ((
int *)__P, (__v16si) __A,
4510 __builtin_ia32_storeupd512_mask ((
double *)__P, (__v8df) __A, (__mmask8) __U);
4516 struct __storeu_pd {
4519 ((
struct __storeu_pd*)__P)->__v = __A;
4525 __builtin_ia32_storeups512_mask ((
float *)__P, (__v16sf) __A,
4532 struct __storeu_ps {
4535 ((
struct __storeu_ps*)__P)->__v = __A;
4541 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4547 *(__m512d*)__P = __A;
4553 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4560 *(__m512*)__P = __A;
4566 *(__m512i *) __P = __A;
4572 *(__m512i *) __P = __A;
4578 *(__m512i *) __P = __A;
4586 return __builtin_ia32_knothi(__M);
4591 #define _mm512_cmpeq_epi32_mask(A, B) \ 4592 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 4593 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \ 4594 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 4595 #define _mm512_cmpge_epi32_mask(A, B) \ 4596 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 4597 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \ 4598 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 4599 #define _mm512_cmpgt_epi32_mask(A, B) \ 4600 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 4601 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \ 4602 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 4603 #define _mm512_cmple_epi32_mask(A, B) \ 4604 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 4605 #define _mm512_mask_cmple_epi32_mask(k, A, B) \ 4606 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 4607 #define _mm512_cmplt_epi32_mask(A, B) \ 4608 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 4609 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \ 4610 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 4611 #define _mm512_cmpneq_epi32_mask(A, B) \ 4612 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 4613 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \ 4614 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 4616 #define _mm512_cmpeq_epu32_mask(A, B) \ 4617 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 4618 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \ 4619 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 4620 #define _mm512_cmpge_epu32_mask(A, B) \ 4621 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 4622 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \ 4623 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 4624 #define _mm512_cmpgt_epu32_mask(A, B) \ 4625 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 4626 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \ 4627 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 4628 #define _mm512_cmple_epu32_mask(A, B) \ 4629 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 4630 #define _mm512_mask_cmple_epu32_mask(k, A, B) \ 4631 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 4632 #define _mm512_cmplt_epu32_mask(A, B) \ 4633 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 4634 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \ 4635 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 4636 #define _mm512_cmpneq_epu32_mask(A, B) \ 4637 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 4638 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \ 4639 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 4641 #define _mm512_cmpeq_epi64_mask(A, B) \ 4642 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 4643 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \ 4644 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 4645 #define _mm512_cmpge_epi64_mask(A, B) \ 4646 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 4647 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \ 4648 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 4649 #define _mm512_cmpgt_epi64_mask(A, B) \ 4650 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 4651 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \ 4652 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 4653 #define _mm512_cmple_epi64_mask(A, B) \ 4654 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 4655 #define _mm512_mask_cmple_epi64_mask(k, A, B) \ 4656 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 4657 #define _mm512_cmplt_epi64_mask(A, B) \ 4658 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 4659 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \ 4660 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 4661 #define _mm512_cmpneq_epi64_mask(A, B) \ 4662 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 4663 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \ 4664 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 4666 #define _mm512_cmpeq_epu64_mask(A, B) \ 4667 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 4668 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \ 4669 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 4670 #define _mm512_cmpge_epu64_mask(A, B) \ 4671 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 4672 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \ 4673 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 4674 #define _mm512_cmpgt_epu64_mask(A, B) \ 4675 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 4676 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \ 4677 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 4678 #define _mm512_cmple_epu64_mask(A, B) \ 4679 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 4680 #define _mm512_mask_cmple_epu64_mask(k, A, B) \ 4681 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 4682 #define _mm512_cmplt_epu64_mask(A, B) \ 4683 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 4684 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \ 4685 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 4686 #define _mm512_cmpneq_epu64_mask(A, B) \ 4687 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 4688 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ 4689 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 4696 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4702 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4710 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4720 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4726 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4734 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4742 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4748 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4756 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4764 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4770 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4778 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4786 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4792 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4800 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4808 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4814 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4822 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4830 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4836 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4844 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4852 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4858 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4866 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4874 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4880 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4888 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4896 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4902 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4910 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4918 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4924 return (__m512i)__builtin_ia32_selectd_512(__U,
4932 return (__m512i)__builtin_ia32_selectd_512(__U,
4940 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4946 return (__m512i)__builtin_ia32_selectq_512(__U,
4954 return (__m512i)__builtin_ia32_selectq_512(__U,
4961 #define _mm512_cmp_epi32_mask(a, b, p) \ 4962 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 4963 (__v16si)(__m512i)(b), (int)(p), \ 4966 #define _mm512_cmp_epu32_mask(a, b, p) \ 4967 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 4968 (__v16si)(__m512i)(b), (int)(p), \ 4971 #define _mm512_cmp_epi64_mask(a, b, p) \ 4972 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 4973 (__v8di)(__m512i)(b), (int)(p), \ 4976 #define _mm512_cmp_epu64_mask(a, b, p) \ 4977 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 4978 (__v8di)(__m512i)(b), (int)(p), \ 4981 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \ 4982 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 4983 (__v16si)(__m512i)(b), (int)(p), \ 4986 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \ 4987 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 4988 (__v16si)(__m512i)(b), (int)(p), \ 4991 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \ 4992 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 4993 (__v8di)(__m512i)(b), (int)(p), \ 4996 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \ 4997 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 4998 (__v8di)(__m512i)(b), (int)(p), \ 5001 #define _mm512_rol_epi32(a, b) \ 5002 (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)) 5004 #define _mm512_mask_rol_epi32(W, U, a, b) \ 5005 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5006 (__v16si)_mm512_rol_epi32((a), (b)), \ 5007 (__v16si)(__m512i)(W)) 5009 #define _mm512_maskz_rol_epi32(U, a, b) \ 5010 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5011 (__v16si)_mm512_rol_epi32((a), (b)), \ 5012 (__v16si)_mm512_setzero_si512()) 5014 #define _mm512_rol_epi64(a, b) \ 5015 (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)) 5017 #define _mm512_mask_rol_epi64(W, U, a, b) \ 5018 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5019 (__v8di)_mm512_rol_epi64((a), (b)), \ 5020 (__v8di)(__m512i)(W)) 5022 #define _mm512_maskz_rol_epi64(U, a, b) \ 5023 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5024 (__v8di)_mm512_rol_epi64((a), (b)), \ 5025 (__v8di)_mm512_setzero_si512()) 5030 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5036 return (__m512i)__builtin_ia32_selectd_512(__U,
5044 return (__m512i)__builtin_ia32_selectd_512(__U,
5052 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5058 return (__m512i)__builtin_ia32_selectq_512(__U,
5066 return (__m512i)__builtin_ia32_selectq_512(__U,
5071 #define _mm512_ror_epi32(A, B) \ 5072 (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)) 5074 #define _mm512_mask_ror_epi32(W, U, A, B) \ 5075 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5076 (__v16si)_mm512_ror_epi32((A), (B)), \ 5077 (__v16si)(__m512i)(W)) 5079 #define _mm512_maskz_ror_epi32(U, A, B) \ 5080 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 5081 (__v16si)_mm512_ror_epi32((A), (B)), \ 5082 (__v16si)_mm512_setzero_si512()) 5084 #define _mm512_ror_epi64(A, B) \ 5085 (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)) 5087 #define _mm512_mask_ror_epi64(W, U, A, B) \ 5088 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5089 (__v8di)_mm512_ror_epi64((A), (B)), \ 5090 (__v8di)(__m512i)(W)) 5092 #define _mm512_maskz_ror_epi64(U, A, B) \ 5093 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 5094 (__v8di)_mm512_ror_epi64((A), (B)), \ 5095 (__v8di)_mm512_setzero_si512()) 5100 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5106 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5113 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5121 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5127 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5135 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5143 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5149 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5156 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5164 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5170 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5178 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5186 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5194 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5203 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5210 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5218 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5226 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5234 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5242 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5250 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5259 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5266 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5267 0, 0, 2, 2, 4, 4, 6, 6);
5273 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5281 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5286 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \ 5287 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5288 (__v8df)(__m512d)(B), \ 5289 (__v8di)(__m512i)(C), (int)(imm), \ 5290 (__mmask8)-1, (int)(R)) 5292 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ 5293 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5294 (__v8df)(__m512d)(B), \ 5295 (__v8di)(__m512i)(C), (int)(imm), \ 5296 (__mmask8)(U), (int)(R)) 5298 #define _mm512_fixupimm_pd(A, B, C, imm) \ 5299 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5300 (__v8df)(__m512d)(B), \ 5301 (__v8di)(__m512i)(C), (int)(imm), \ 5303 _MM_FROUND_CUR_DIRECTION) 5305 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \ 5306 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \ 5307 (__v8df)(__m512d)(B), \ 5308 (__v8di)(__m512i)(C), (int)(imm), \ 5310 _MM_FROUND_CUR_DIRECTION) 5312 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ 5313 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5314 (__v8df)(__m512d)(B), \ 5315 (__v8di)(__m512i)(C), \ 5316 (int)(imm), (__mmask8)(U), \ 5319 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \ 5320 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \ 5321 (__v8df)(__m512d)(B), \ 5322 (__v8di)(__m512i)(C), \ 5323 (int)(imm), (__mmask8)(U), \ 5324 _MM_FROUND_CUR_DIRECTION) 5326 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \ 5327 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5328 (__v16sf)(__m512)(B), \ 5329 (__v16si)(__m512i)(C), (int)(imm), \ 5330 (__mmask16)-1, (int)(R)) 5332 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ 5333 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5334 (__v16sf)(__m512)(B), \ 5335 (__v16si)(__m512i)(C), (int)(imm), \ 5336 (__mmask16)(U), (int)(R)) 5338 #define _mm512_fixupimm_ps(A, B, C, imm) \ 5339 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5340 (__v16sf)(__m512)(B), \ 5341 (__v16si)(__m512i)(C), (int)(imm), \ 5343 _MM_FROUND_CUR_DIRECTION) 5345 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \ 5346 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \ 5347 (__v16sf)(__m512)(B), \ 5348 (__v16si)(__m512i)(C), (int)(imm), \ 5350 _MM_FROUND_CUR_DIRECTION) 5352 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ 5353 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5354 (__v16sf)(__m512)(B), \ 5355 (__v16si)(__m512i)(C), \ 5356 (int)(imm), (__mmask16)(U), \ 5359 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \ 5360 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \ 5361 (__v16sf)(__m512)(B), \ 5362 (__v16si)(__m512i)(C), \ 5363 (int)(imm), (__mmask16)(U), \ 5364 _MM_FROUND_CUR_DIRECTION) 5366 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \ 5367 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5368 (__v2df)(__m128d)(B), \ 5369 (__v2di)(__m128i)(C), (int)(imm), \ 5370 (__mmask8)-1, (int)(R)) 5372 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \ 5373 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5374 (__v2df)(__m128d)(B), \ 5375 (__v2di)(__m128i)(C), (int)(imm), \ 5376 (__mmask8)(U), (int)(R)) 5378 #define _mm_fixupimm_sd(A, B, C, imm) \ 5379 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5380 (__v2df)(__m128d)(B), \ 5381 (__v2di)(__m128i)(C), (int)(imm), \ 5383 _MM_FROUND_CUR_DIRECTION) 5385 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \ 5386 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \ 5387 (__v2df)(__m128d)(B), \ 5388 (__v2di)(__m128i)(C), (int)(imm), \ 5390 _MM_FROUND_CUR_DIRECTION) 5392 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \ 5393 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5394 (__v2df)(__m128d)(B), \ 5395 (__v2di)(__m128i)(C), (int)(imm), \ 5396 (__mmask8)(U), (int)(R)) 5398 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \ 5399 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \ 5400 (__v2df)(__m128d)(B), \ 5401 (__v2di)(__m128i)(C), (int)(imm), \ 5403 _MM_FROUND_CUR_DIRECTION) 5405 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \ 5406 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5407 (__v4sf)(__m128)(B), \ 5408 (__v4si)(__m128i)(C), (int)(imm), \ 5409 (__mmask8)-1, (int)(R)) 5411 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \ 5412 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5413 (__v4sf)(__m128)(B), \ 5414 (__v4si)(__m128i)(C), (int)(imm), \ 5415 (__mmask8)(U), (int)(R)) 5417 #define _mm_fixupimm_ss(A, B, C, imm) \ 5418 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5419 (__v4sf)(__m128)(B), \ 5420 (__v4si)(__m128i)(C), (int)(imm), \ 5422 _MM_FROUND_CUR_DIRECTION) 5424 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \ 5425 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \ 5426 (__v4sf)(__m128)(B), \ 5427 (__v4si)(__m128i)(C), (int)(imm), \ 5429 _MM_FROUND_CUR_DIRECTION) 5431 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \ 5432 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5433 (__v4sf)(__m128)(B), \ 5434 (__v4si)(__m128i)(C), (int)(imm), \ 5435 (__mmask8)(U), (int)(R)) 5437 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \ 5438 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \ 5439 (__v4sf)(__m128)(B), \ 5440 (__v4si)(__m128i)(C), (int)(imm), \ 5442 _MM_FROUND_CUR_DIRECTION) 5444 #define _mm_getexp_round_sd(A, B, R) \ 5445 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5446 (__v2df)(__m128d)(B), \ 5447 (__v2df)_mm_setzero_pd(), \ 5448 (__mmask8)-1, (int)(R)) 5454 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5461 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5468 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \ 5469 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5470 (__v2df)(__m128d)(B), \ 5471 (__v2df)(__m128d)(W), \ 5472 (__mmask8)(U), (int)(R)) 5477 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5484 #define _mm_maskz_getexp_round_sd(U, A, B, R) \ 5485 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \ 5486 (__v2df)(__m128d)(B), \ 5487 (__v2df)_mm_setzero_pd(), \ 5488 (__mmask8)(U), (int)(R)) 5490 #define _mm_getexp_round_ss(A, B, R) \ 5491 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5492 (__v4sf)(__m128)(B), \ 5493 (__v4sf)_mm_setzero_ps(), \ 5494 (__mmask8)-1, (int)(R)) 5499 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5506 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5513 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \ 5514 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5515 (__v4sf)(__m128)(B), \ 5516 (__v4sf)(__m128)(W), \ 5517 (__mmask8)(U), (int)(R)) 5522 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5529 #define _mm_maskz_getexp_round_ss(U, A, B, R) \ 5530 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \ 5531 (__v4sf)(__m128)(B), \ 5532 (__v4sf)_mm_setzero_ps(), \ 5533 (__mmask8)(U), (int)(R)) 5535 #define _mm_getmant_round_sd(A, B, C, D, R) \ 5536 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5537 (__v2df)(__m128d)(B), \ 5538 (int)(((D)<<2) | (C)), \ 5539 (__v2df)_mm_setzero_pd(), \ 5540 (__mmask8)-1, (int)(R)) 5542 #define _mm_getmant_sd(A, B, C, D) \ 5543 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5544 (__v2df)(__m128d)(B), \ 5545 (int)(((D)<<2) | (C)), \ 5546 (__v2df)_mm_setzero_pd(), \ 5548 _MM_FROUND_CUR_DIRECTION) 5550 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \ 5551 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5552 (__v2df)(__m128d)(B), \ 5553 (int)(((D)<<2) | (C)), \ 5554 (__v2df)(__m128d)(W), \ 5556 _MM_FROUND_CUR_DIRECTION) 5558 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \ 5559 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5560 (__v2df)(__m128d)(B), \ 5561 (int)(((D)<<2) | (C)), \ 5562 (__v2df)(__m128d)(W), \ 5563 (__mmask8)(U), (int)(R)) 5565 #define _mm_maskz_getmant_sd(U, A, B, C, D) \ 5566 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5567 (__v2df)(__m128d)(B), \ 5568 (int)(((D)<<2) | (C)), \ 5569 (__v2df)_mm_setzero_pd(), \ 5571 _MM_FROUND_CUR_DIRECTION) 5573 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \ 5574 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \ 5575 (__v2df)(__m128d)(B), \ 5576 (int)(((D)<<2) | (C)), \ 5577 (__v2df)_mm_setzero_pd(), \ 5578 (__mmask8)(U), (int)(R)) 5580 #define _mm_getmant_round_ss(A, B, C, D, R) \ 5581 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5582 (__v4sf)(__m128)(B), \ 5583 (int)(((D)<<2) | (C)), \ 5584 (__v4sf)_mm_setzero_ps(), \ 5585 (__mmask8)-1, (int)(R)) 5587 #define _mm_getmant_ss(A, B, C, D) \ 5588 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5589 (__v4sf)(__m128)(B), \ 5590 (int)(((D)<<2) | (C)), \ 5591 (__v4sf)_mm_setzero_ps(), \ 5593 _MM_FROUND_CUR_DIRECTION) 5595 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \ 5596 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5597 (__v4sf)(__m128)(B), \ 5598 (int)(((D)<<2) | (C)), \ 5599 (__v4sf)(__m128)(W), \ 5601 _MM_FROUND_CUR_DIRECTION) 5603 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \ 5604 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5605 (__v4sf)(__m128)(B), \ 5606 (int)(((D)<<2) | (C)), \ 5607 (__v4sf)(__m128)(W), \ 5608 (__mmask8)(U), (int)(R)) 5610 #define _mm_maskz_getmant_ss(U, A, B, C, D) \ 5611 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5612 (__v4sf)(__m128)(B), \ 5613 (int)(((D)<<2) | (C)), \ 5614 (__v4sf)_mm_setzero_ps(), \ 5616 _MM_FROUND_CUR_DIRECTION) 5618 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \ 5619 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \ 5620 (__v4sf)(__m128)(B), \ 5621 (int)(((D)<<2) | (C)), \ 5622 (__v4sf)_mm_setzero_ps(), \ 5623 (__mmask8)(U), (int)(R)) 5631 #define _mm_comi_round_sd(A, B, P, R) \ 5632 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \ 5635 #define _mm_comi_round_ss(A, B, P, R) \ 5636 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 5640 #define _mm_cvt_roundsd_si64(A, R) \ 5641 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) 5647 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5653 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5661 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5669 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5675 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5683 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5691 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5697 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5705 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5713 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5719 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5727 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5735 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5741 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5749 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5757 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5763 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5771 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5779 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5785 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5793 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5801 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5807 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5815 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5823 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5829 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5837 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5845 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5851 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5859 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5867 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5873 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5881 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5889 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5895 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5903 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5908 #define _mm512_ternarylogic_epi32(A, B, C, imm) \ 5909 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 5910 (__v16si)(__m512i)(B), \ 5911 (__v16si)(__m512i)(C), (int)(imm), \ 5914 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \ 5915 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \ 5916 (__v16si)(__m512i)(B), \ 5917 (__v16si)(__m512i)(C), (int)(imm), \ 5920 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 5921 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \ 5922 (__v16si)(__m512i)(B), \ 5923 (__v16si)(__m512i)(C), \ 5924 (int)(imm), (__mmask16)(U)) 5926 #define _mm512_ternarylogic_epi64(A, B, C, imm) \ 5927 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 5928 (__v8di)(__m512i)(B), \ 5929 (__v8di)(__m512i)(C), (int)(imm), \ 5932 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \ 5933 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \ 5934 (__v8di)(__m512i)(B), \ 5935 (__v8di)(__m512i)(C), (int)(imm), \ 5938 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 5939 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \ 5940 (__v8di)(__m512i)(B), \ 5941 (__v8di)(__m512i)(C), (int)(imm), \ 5945 #define _mm_cvt_roundsd_i64(A, R) \ 5946 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) 5949 #define _mm_cvt_roundsd_si32(A, R) \ 5950 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) 5952 #define _mm_cvt_roundsd_i32(A, R) \ 5953 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)) 5955 #define _mm_cvt_roundsd_u32(A, R) \ 5956 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) 5961 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5966 #define _mm_cvt_roundsd_u64(A, R) \ 5967 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ 5971 _mm_cvtsd_u64 (__m128d __A)
5973 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5979 #define _mm_cvt_roundss_si32(A, R) \ 5980 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) 5982 #define _mm_cvt_roundss_i32(A, R) \ 5983 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)) 5986 #define _mm_cvt_roundss_si64(A, R) \ 5987 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) 5989 #define _mm_cvt_roundss_i64(A, R) \ 5990 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)) 5993 #define _mm_cvt_roundss_u32(A, R) \ 5994 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) 5999 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6004 #define _mm_cvt_roundss_u64(A, R) \ 6005 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ 6009 _mm_cvtss_u64 (__m128 __A)
6011 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6017 #define _mm_cvtt_roundsd_i32(A, R) \ 6018 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) 6020 #define _mm_cvtt_roundsd_si32(A, R) \ 6021 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) 6026 return (
int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6031 #define _mm_cvtt_roundsd_si64(A, R) \ 6032 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) 6034 #define _mm_cvtt_roundsd_i64(A, R) \ 6035 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) 6038 _mm_cvttsd_i64 (__m128d __A)
6040 return (
long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6045 #define _mm_cvtt_roundsd_u32(A, R) \ 6046 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) 6051 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6056 #define _mm_cvtt_roundsd_u64(A, R) \ 6057 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ 6061 _mm_cvttsd_u64 (__m128d __A)
6063 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6069 #define _mm_cvtt_roundss_i32(A, R) \ 6070 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) 6072 #define _mm_cvtt_roundss_si32(A, R) \ 6073 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) 6078 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6083 #define _mm_cvtt_roundss_i64(A, R) \ 6084 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) 6086 #define _mm_cvtt_roundss_si64(A, R) \ 6087 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) 6090 _mm_cvttss_i64 (__m128 __A)
6092 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6097 #define _mm_cvtt_roundss_u32(A, R) \ 6098 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) 6103 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6108 #define _mm_cvtt_roundss_u64(A, R) \ 6109 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ 6113 _mm_cvttss_u64 (__m128 __A)
6115 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6121 #define _mm512_permute_pd(X, C) \ 6122 (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)) 6124 #define _mm512_mask_permute_pd(W, U, X, C) \ 6125 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6126 (__v8df)_mm512_permute_pd((X), (C)), \ 6127 (__v8df)(__m512d)(W)) 6129 #define _mm512_maskz_permute_pd(U, X, C) \ 6130 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6131 (__v8df)_mm512_permute_pd((X), (C)), \ 6132 (__v8df)_mm512_setzero_pd()) 6134 #define _mm512_permute_ps(X, C) \ 6135 (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)) 6137 #define _mm512_mask_permute_ps(W, U, X, C) \ 6138 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6139 (__v16sf)_mm512_permute_ps((X), (C)), \ 6140 (__v16sf)(__m512)(W)) 6142 #define _mm512_maskz_permute_ps(U, X, C) \ 6143 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6144 (__v16sf)_mm512_permute_ps((X), (C)), \ 6145 (__v16sf)_mm512_setzero_ps()) 6150 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6156 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6164 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6172 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6178 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6186 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6194 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6201 return (__m512d)__builtin_ia32_selectpd_512(__U,
6210 return (__m512d)__builtin_ia32_selectpd_512(__U,
6212 (__v8df)(__m512d)__I);
6219 return (__m512d)__builtin_ia32_selectpd_512(__U,
6227 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6234 return (__m512)__builtin_ia32_selectps_512(__U,
6242 return (__m512)__builtin_ia32_selectps_512(__U,
6244 (__v16sf)(__m512)__I);
6250 return (__m512)__builtin_ia32_selectps_512(__U,
6256 #define _mm512_cvtt_roundpd_epu32(A, R) \ 6257 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6258 (__v8si)_mm256_undefined_si256(), \ 6259 (__mmask8)-1, (int)(R)) 6261 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \ 6262 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6263 (__v8si)(__m256i)(W), \ 6264 (__mmask8)(U), (int)(R)) 6266 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \ 6267 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \ 6268 (__v8si)_mm256_setzero_si256(), \ 6269 (__mmask8)(U), (int)(R)) 6274 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6284 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6293 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6300 #define _mm_roundscale_round_sd(A, B, imm, R) \ 6301 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6302 (__v2df)(__m128d)(B), \ 6303 (__v2df)_mm_setzero_pd(), \ 6304 (__mmask8)-1, (int)(imm), \ 6307 #define _mm_roundscale_sd(A, B, imm) \ 6308 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6309 (__v2df)(__m128d)(B), \ 6310 (__v2df)_mm_setzero_pd(), \ 6311 (__mmask8)-1, (int)(imm), \ 6312 _MM_FROUND_CUR_DIRECTION) 6314 #define _mm_mask_roundscale_sd(W, U, A, B, imm) \ 6315 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6316 (__v2df)(__m128d)(B), \ 6317 (__v2df)(__m128d)(W), \ 6318 (__mmask8)(U), (int)(imm), \ 6319 _MM_FROUND_CUR_DIRECTION) 6321 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \ 6322 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6323 (__v2df)(__m128d)(B), \ 6324 (__v2df)(__m128d)(W), \ 6325 (__mmask8)(U), (int)(I), \ 6328 #define _mm_maskz_roundscale_sd(U, A, B, I) \ 6329 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6330 (__v2df)(__m128d)(B), \ 6331 (__v2df)_mm_setzero_pd(), \ 6332 (__mmask8)(U), (int)(I), \ 6333 _MM_FROUND_CUR_DIRECTION) 6335 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \ 6336 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \ 6337 (__v2df)(__m128d)(B), \ 6338 (__v2df)_mm_setzero_pd(), \ 6339 (__mmask8)(U), (int)(I), \ 6342 #define _mm_roundscale_round_ss(A, B, imm, R) \ 6343 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6344 (__v4sf)(__m128)(B), \ 6345 (__v4sf)_mm_setzero_ps(), \ 6346 (__mmask8)-1, (int)(imm), \ 6349 #define _mm_roundscale_ss(A, B, imm) \ 6350 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6351 (__v4sf)(__m128)(B), \ 6352 (__v4sf)_mm_setzero_ps(), \ 6353 (__mmask8)-1, (int)(imm), \ 6354 _MM_FROUND_CUR_DIRECTION) 6356 #define _mm_mask_roundscale_ss(W, U, A, B, I) \ 6357 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6358 (__v4sf)(__m128)(B), \ 6359 (__v4sf)(__m128)(W), \ 6360 (__mmask8)(U), (int)(I), \ 6361 _MM_FROUND_CUR_DIRECTION) 6363 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \ 6364 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6365 (__v4sf)(__m128)(B), \ 6366 (__v4sf)(__m128)(W), \ 6367 (__mmask8)(U), (int)(I), \ 6370 #define _mm_maskz_roundscale_ss(U, A, B, I) \ 6371 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6372 (__v4sf)(__m128)(B), \ 6373 (__v4sf)_mm_setzero_ps(), \ 6374 (__mmask8)(U), (int)(I), \ 6375 _MM_FROUND_CUR_DIRECTION) 6377 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \ 6378 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \ 6379 (__v4sf)(__m128)(B), \ 6380 (__v4sf)_mm_setzero_ps(), \ 6381 (__mmask8)(U), (int)(I), \ 6384 #define _mm512_scalef_round_pd(A, B, R) \ 6385 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6386 (__v8df)(__m512d)(B), \ 6387 (__v8df)_mm512_undefined_pd(), \ 6388 (__mmask8)-1, (int)(R)) 6390 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \ 6391 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6392 (__v8df)(__m512d)(B), \ 6393 (__v8df)(__m512d)(W), \ 6394 (__mmask8)(U), (int)(R)) 6396 #define _mm512_maskz_scalef_round_pd(U, A, B, R) \ 6397 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \ 6398 (__v8df)(__m512d)(B), \ 6399 (__v8df)_mm512_setzero_pd(), \ 6400 (__mmask8)(U), (int)(R)) 6405 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6416 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6426 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6434 #define _mm512_scalef_round_ps(A, B, R) \ 6435 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6436 (__v16sf)(__m512)(B), \ 6437 (__v16sf)_mm512_undefined_ps(), \ 6438 (__mmask16)-1, (int)(R)) 6440 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \ 6441 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6442 (__v16sf)(__m512)(B), \ 6443 (__v16sf)(__m512)(W), \ 6444 (__mmask16)(U), (int)(R)) 6446 #define _mm512_maskz_scalef_round_ps(U, A, B, R) \ 6447 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \ 6448 (__v16sf)(__m512)(B), \ 6449 (__v16sf)_mm512_setzero_ps(), \ 6450 (__mmask16)(U), (int)(R)) 6455 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6466 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6476 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6484 #define _mm_scalef_round_sd(A, B, R) \ 6485 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6486 (__v2df)(__m128d)(B), \ 6487 (__v2df)_mm_setzero_pd(), \ 6488 (__mmask8)-1, (int)(R)) 6493 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6502 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6509 #define _mm_mask_scalef_round_sd(W, U, A, B, R) \ 6510 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6511 (__v2df)(__m128d)(B), \ 6512 (__v2df)(__m128d)(W), \ 6513 (__mmask8)(U), (int)(R)) 6518 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6525 #define _mm_maskz_scalef_round_sd(U, A, B, R) \ 6526 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \ 6527 (__v2df)(__m128d)(B), \ 6528 (__v2df)_mm_setzero_pd(), \ 6529 (__mmask8)(U), (int)(R)) 6531 #define _mm_scalef_round_ss(A, B, R) \ 6532 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6533 (__v4sf)(__m128)(B), \ 6534 (__v4sf)_mm_setzero_ps(), \ 6535 (__mmask8)-1, (int)(R)) 6540 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6549 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6556 #define _mm_mask_scalef_round_ss(W, U, A, B, R) \ 6557 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6558 (__v4sf)(__m128)(B), \ 6559 (__v4sf)(__m128)(W), \ 6560 (__mmask8)(U), (int)(R)) 6565 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6572 #define _mm_maskz_scalef_round_ss(U, A, B, R) \ 6573 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \ 6574 (__v4sf)(__m128)(B), \ 6575 (__v4sf)_mm_setzero_ps(), \ 6582 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6588 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6595 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6603 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6609 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6617 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6622 #define _mm512_shuffle_f32x4(A, B, imm) \ 6623 (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \ 6624 (__v16sf)(__m512)(B), (int)(imm)) 6626 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \ 6627 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6628 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6629 (__v16sf)(__m512)(W)) 6631 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \ 6632 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6633 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \ 6634 (__v16sf)_mm512_setzero_ps()) 6636 #define _mm512_shuffle_f64x2(A, B, imm) \ 6637 (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \ 6638 (__v8df)(__m512d)(B), (int)(imm)) 6640 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \ 6641 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6642 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6643 (__v8df)(__m512d)(W)) 6645 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \ 6646 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6647 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \ 6648 (__v8df)_mm512_setzero_pd()) 6650 #define _mm512_shuffle_i32x4(A, B, imm) \ 6651 (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \ 6652 (__v16si)(__m512i)(B), (int)(imm)) 6654 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \ 6655 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 6656 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6657 (__v16si)(__m512i)(W)) 6659 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \ 6660 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 6661 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \ 6662 (__v16si)_mm512_setzero_si512()) 6664 #define _mm512_shuffle_i64x2(A, B, imm) \ 6665 (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \ 6666 (__v8di)(__m512i)(B), (int)(imm)) 6668 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \ 6669 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 6670 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6671 (__v8di)(__m512i)(W)) 6673 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \ 6674 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 6675 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \ 6676 (__v8di)_mm512_setzero_si512()) 6678 #define _mm512_shuffle_pd(A, B, M) \ 6679 (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \ 6680 (__v8df)(__m512d)(B), (int)(M)) 6682 #define _mm512_mask_shuffle_pd(W, U, A, B, M) \ 6683 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6684 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6685 (__v8df)(__m512d)(W)) 6687 #define _mm512_maskz_shuffle_pd(U, A, B, M) \ 6688 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 6689 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \ 6690 (__v8df)_mm512_setzero_pd()) 6692 #define _mm512_shuffle_ps(A, B, M) \ 6693 (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \ 6694 (__v16sf)(__m512)(B), (int)(M)) 6696 #define _mm512_mask_shuffle_ps(W, U, A, B, M) \ 6697 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6698 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6699 (__v16sf)(__m512)(W)) 6701 #define _mm512_maskz_shuffle_ps(U, A, B, M) \ 6702 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 6703 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \ 6704 (__v16sf)_mm512_setzero_ps()) 6706 #define _mm_sqrt_round_sd(A, B, R) \ 6707 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6708 (__v2df)(__m128d)(B), \ 6709 (__v2df)_mm_setzero_pd(), \ 6710 (__mmask8)-1, (int)(R)) 6715 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6722 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \ 6723 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6724 (__v2df)(__m128d)(B), \ 6725 (__v2df)(__m128d)(W), \ 6726 (__mmask8)(U), (int)(R)) 6731 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6738 #define _mm_maskz_sqrt_round_sd(U, A, B, R) \ 6739 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \ 6740 (__v2df)(__m128d)(B), \ 6741 (__v2df)_mm_setzero_pd(), \ 6742 (__mmask8)(U), (int)(R)) 6744 #define _mm_sqrt_round_ss(A, B, R) \ 6745 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6746 (__v4sf)(__m128)(B), \ 6747 (__v4sf)_mm_setzero_ps(), \ 6748 (__mmask8)-1, (int)(R)) 6753 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6760 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \ 6761 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6762 (__v4sf)(__m128)(B), \ 6763 (__v4sf)(__m128)(W), (__mmask8)(U), \ 6769 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6776 #define _mm_maskz_sqrt_round_ss(U, A, B, R) \ 6777 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \ 6778 (__v4sf)(__m128)(B), \ 6779 (__v4sf)_mm_setzero_ps(), \ 6780 (__mmask8)(U), (int)(R)) 6785 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6786 0, 1, 2, 3, 0, 1, 2, 3,
6787 0, 1, 2, 3, 0, 1, 2, 3);
6793 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6801 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6809 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6810 0, 1, 2, 3, 0, 1, 2, 3);
6816 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6824 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6832 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6833 0, 1, 2, 3, 0, 1, 2, 3,
6834 0, 1, 2, 3, 0, 1, 2, 3);
6840 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6848 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6856 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6857 0, 1, 2, 3, 0, 1, 2, 3);
6863 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6879 return (__m512d)__builtin_ia32_selectpd_512(__M,
6887 return (__m512d)__builtin_ia32_selectpd_512(__M,
6895 return (__m512)__builtin_ia32_selectps_512(__M,
6903 return (__m512)__builtin_ia32_selectps_512(__M,
6911 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6919 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6920 (__v16qi) __O, __M);
6926 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6934 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6940 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6948 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6949 (__v16hi) __O, __M);
6955 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6963 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6969 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6977 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6978 (__v16qi) __O, __M);
6984 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6992 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
6998 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7006 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7013 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7021 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7027 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7035 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7042 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7050 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7056 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7064 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7072 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7080 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7086 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7094 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7102 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7110 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7116 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7124 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7132 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7140 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7146 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7154 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7161 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7169 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7175 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7183 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7190 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7198 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7204 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7212 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7213 (__v16qi) __O, __M);
7219 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7227 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7233 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7241 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7242 (__v16hi) __O, __M);
7248 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7256 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7262 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7270 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7271 (__v16qi) __O, __M);
7277 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7285 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7291 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7299 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7306 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7314 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7320 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7328 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7335 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7343 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7346 #define _mm512_extracti32x4_epi32(A, imm) \ 7347 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7348 (__v4si)_mm_undefined_si128(), \ 7351 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ 7352 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7353 (__v4si)(__m128i)(W), \ 7356 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ 7357 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ 7358 (__v4si)_mm_setzero_si128(), \ 7361 #define _mm512_extracti64x4_epi64(A, imm) \ 7362 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7363 (__v4di)_mm256_undefined_si256(), \ 7366 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ 7367 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7368 (__v4di)(__m256i)(W), \ 7371 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ 7372 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ 7373 (__v4di)_mm256_setzero_si256(), \ 7376 #define _mm512_insertf64x4(A, B, imm) \ 7377 (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \ 7378 (__v4df)(__m256d)(B), (int)(imm)) 7380 #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ 7381 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7382 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7383 (__v8df)(__m512d)(W)) 7385 #define _mm512_maskz_insertf64x4(U, A, B, imm) \ 7386 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 7387 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ 7388 (__v8df)_mm512_setzero_pd()) 7390 #define _mm512_inserti64x4(A, B, imm) \ 7391 (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \ 7392 (__v4di)(__m256i)(B), (int)(imm)) 7394 #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ 7395 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7396 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7397 (__v8di)(__m512i)(W)) 7399 #define _mm512_maskz_inserti64x4(U, A, B, imm) \ 7400 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 7401 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ 7402 (__v8di)_mm512_setzero_si512()) 7404 #define _mm512_insertf32x4(A, B, imm) \ 7405 (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \ 7406 (__v4sf)(__m128)(B), (int)(imm)) 7408 #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ 7409 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7410 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7411 (__v16sf)(__m512)(W)) 7413 #define _mm512_maskz_insertf32x4(U, A, B, imm) \ 7414 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ 7415 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ 7416 (__v16sf)_mm512_setzero_ps()) 7418 #define _mm512_inserti32x4(A, B, imm) \ 7419 (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \ 7420 (__v4si)(__m128i)(B), (int)(imm)) 7422 #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ 7423 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7424 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7425 (__v16si)(__m512i)(W)) 7427 #define _mm512_maskz_inserti32x4(U, A, B, imm) \ 7428 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 7429 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ 7430 (__v16si)_mm512_setzero_si512()) 7432 #define _mm512_getmant_round_pd(A, B, C, R) \ 7433 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7434 (int)(((C)<<2) | (B)), \ 7435 (__v8df)_mm512_undefined_pd(), \ 7436 (__mmask8)-1, (int)(R)) 7438 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \ 7439 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7440 (int)(((C)<<2) | (B)), \ 7441 (__v8df)(__m512d)(W), \ 7442 (__mmask8)(U), (int)(R)) 7444 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \ 7445 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7446 (int)(((C)<<2) | (B)), \ 7447 (__v8df)_mm512_setzero_pd(), \ 7448 (__mmask8)(U), (int)(R)) 7450 #define _mm512_getmant_pd(A, B, C) \ 7451 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7452 (int)(((C)<<2) | (B)), \ 7453 (__v8df)_mm512_setzero_pd(), \ 7455 _MM_FROUND_CUR_DIRECTION) 7457 #define _mm512_mask_getmant_pd(W, U, A, B, C) \ 7458 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7459 (int)(((C)<<2) | (B)), \ 7460 (__v8df)(__m512d)(W), \ 7462 _MM_FROUND_CUR_DIRECTION) 7464 #define _mm512_maskz_getmant_pd(U, A, B, C) \ 7465 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \ 7466 (int)(((C)<<2) | (B)), \ 7467 (__v8df)_mm512_setzero_pd(), \ 7469 _MM_FROUND_CUR_DIRECTION) 7471 #define _mm512_getmant_round_ps(A, B, C, R) \ 7472 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7473 (int)(((C)<<2) | (B)), \ 7474 (__v16sf)_mm512_undefined_ps(), \ 7475 (__mmask16)-1, (int)(R)) 7477 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \ 7478 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7479 (int)(((C)<<2) | (B)), \ 7480 (__v16sf)(__m512)(W), \ 7481 (__mmask16)(U), (int)(R)) 7483 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \ 7484 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7485 (int)(((C)<<2) | (B)), \ 7486 (__v16sf)_mm512_setzero_ps(), \ 7487 (__mmask16)(U), (int)(R)) 7489 #define _mm512_getmant_ps(A, B, C) \ 7490 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7491 (int)(((C)<<2)|(B)), \ 7492 (__v16sf)_mm512_undefined_ps(), \ 7494 _MM_FROUND_CUR_DIRECTION) 7496 #define _mm512_mask_getmant_ps(W, U, A, B, C) \ 7497 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7498 (int)(((C)<<2)|(B)), \ 7499 (__v16sf)(__m512)(W), \ 7501 _MM_FROUND_CUR_DIRECTION) 7503 #define _mm512_maskz_getmant_ps(U, A, B, C) \ 7504 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \ 7505 (int)(((C)<<2)|(B)), \ 7506 (__v16sf)_mm512_setzero_ps(), \ 7508 _MM_FROUND_CUR_DIRECTION) 7510 #define _mm512_getexp_round_pd(A, R) \ 7511 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7512 (__v8df)_mm512_undefined_pd(), \ 7513 (__mmask8)-1, (int)(R)) 7515 #define _mm512_mask_getexp_round_pd(W, U, A, R) \ 7516 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7517 (__v8df)(__m512d)(W), \ 7518 (__mmask8)(U), (int)(R)) 7520 #define _mm512_maskz_getexp_round_pd(U, A, R) \ 7521 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 7522 (__v8df)_mm512_setzero_pd(), \ 7523 (__mmask8)(U), (int)(R)) 7528 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7537 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7546 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7552 #define _mm512_getexp_round_ps(A, R) \ 7553 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7554 (__v16sf)_mm512_undefined_ps(), \ 7555 (__mmask16)-1, (int)(R)) 7557 #define _mm512_mask_getexp_round_ps(W, U, A, R) \ 7558 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7559 (__v16sf)(__m512)(W), \ 7560 (__mmask16)(U), (int)(R)) 7562 #define _mm512_maskz_getexp_round_ps(U, A, R) \ 7563 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 7564 (__v16sf)_mm512_setzero_ps(), \ 7565 (__mmask16)(U), (int)(R)) 7570 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7579 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7588 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7594 #define _mm512_i64gather_ps(index, addr, scale) \ 7595 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \ 7596 (float const *)(addr), \ 7597 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7600 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7601 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\ 7602 (float const *)(addr), \ 7603 (__v8di)(__m512i)(index), \ 7604 (__mmask8)(mask), (int)(scale)) 7606 #define _mm512_i64gather_epi32(index, addr, scale) \ 7607 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \ 7608 (int const *)(addr), \ 7609 (__v8di)(__m512i)(index), \ 7610 (__mmask8)-1, (int)(scale)) 7612 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7613 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \ 7614 (int const *)(addr), \ 7615 (__v8di)(__m512i)(index), \ 7616 (__mmask8)(mask), (int)(scale)) 7618 #define _mm512_i64gather_pd(index, addr, scale) \ 7619 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \ 7620 (double const *)(addr), \ 7621 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7624 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7625 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \ 7626 (double const *)(addr), \ 7627 (__v8di)(__m512i)(index), \ 7628 (__mmask8)(mask), (int)(scale)) 7630 #define _mm512_i64gather_epi64(index, addr, scale) \ 7631 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \ 7632 (long long const *)(addr), \ 7633 (__v8di)(__m512i)(index), (__mmask8)-1, \ 7636 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7637 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \ 7638 (long long const *)(addr), \ 7639 (__v8di)(__m512i)(index), \ 7640 (__mmask8)(mask), (int)(scale)) 7642 #define _mm512_i32gather_ps(index, addr, scale) \ 7643 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \ 7644 (float const *)(addr), \ 7645 (__v16sf)(__m512)(index), \ 7646 (__mmask16)-1, (int)(scale)) 7648 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \ 7649 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \ 7650 (float const *)(addr), \ 7651 (__v16sf)(__m512)(index), \ 7652 (__mmask16)(mask), (int)(scale)) 7654 #define _mm512_i32gather_epi32(index, addr, scale) \ 7655 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \ 7656 (int const *)(addr), \ 7657 (__v16si)(__m512i)(index), \ 7658 (__mmask16)-1, (int)(scale)) 7660 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 7661 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \ 7662 (int const *)(addr), \ 7663 (__v16si)(__m512i)(index), \ 7664 (__mmask16)(mask), (int)(scale)) 7666 #define _mm512_i32gather_pd(index, addr, scale) \ 7667 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \ 7668 (double const *)(addr), \ 7669 (__v8si)(__m256i)(index), (__mmask8)-1, \ 7672 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \ 7673 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \ 7674 (double const *)(addr), \ 7675 (__v8si)(__m256i)(index), \ 7676 (__mmask8)(mask), (int)(scale)) 7678 #define _mm512_i32gather_epi64(index, addr, scale) \ 7679 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \ 7680 (long long const *)(addr), \ 7681 (__v8si)(__m256i)(index), (__mmask8)-1, \ 7684 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 7685 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \ 7686 (long long const *)(addr), \ 7687 (__v8si)(__m256i)(index), \ 7688 (__mmask8)(mask), (int)(scale)) 7690 #define _mm512_i64scatter_ps(addr, index, v1, scale) \ 7691 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)-1, \ 7692 (__v8di)(__m512i)(index), \ 7693 (__v8sf)(__m256)(v1), (int)(scale)) 7695 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 7696 __builtin_ia32_scatterdiv16sf((float *)(addr), (__mmask8)(mask), \ 7697 (__v8di)(__m512i)(index), \ 7698 (__v8sf)(__m256)(v1), (int)(scale)) 7700 #define _mm512_i64scatter_epi32(addr, index, v1, scale) \ 7701 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)-1, \ 7702 (__v8di)(__m512i)(index), \ 7703 (__v8si)(__m256i)(v1), (int)(scale)) 7705 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 7706 __builtin_ia32_scatterdiv16si((int *)(addr), (__mmask8)(mask), \ 7707 (__v8di)(__m512i)(index), \ 7708 (__v8si)(__m256i)(v1), (int)(scale)) 7710 #define _mm512_i64scatter_pd(addr, index, v1, scale) \ 7711 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)-1, \ 7712 (__v8di)(__m512i)(index), \ 7713 (__v8df)(__m512d)(v1), (int)(scale)) 7715 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 7716 __builtin_ia32_scatterdiv8df((double *)(addr), (__mmask8)(mask), \ 7717 (__v8di)(__m512i)(index), \ 7718 (__v8df)(__m512d)(v1), (int)(scale)) 7720 #define _mm512_i64scatter_epi64(addr, index, v1, scale) \ 7721 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)-1, \ 7722 (__v8di)(__m512i)(index), \ 7723 (__v8di)(__m512i)(v1), (int)(scale)) 7725 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 7726 __builtin_ia32_scatterdiv8di((long long *)(addr), (__mmask8)(mask), \ 7727 (__v8di)(__m512i)(index), \ 7728 (__v8di)(__m512i)(v1), (int)(scale)) 7730 #define _mm512_i32scatter_ps(addr, index, v1, scale) \ 7731 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)-1, \ 7732 (__v16si)(__m512i)(index), \ 7733 (__v16sf)(__m512)(v1), (int)(scale)) 7735 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 7736 __builtin_ia32_scattersiv16sf((float *)(addr), (__mmask16)(mask), \ 7737 (__v16si)(__m512i)(index), \ 7738 (__v16sf)(__m512)(v1), (int)(scale)) 7740 #define _mm512_i32scatter_epi32(addr, index, v1, scale) \ 7741 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)-1, \ 7742 (__v16si)(__m512i)(index), \ 7743 (__v16si)(__m512i)(v1), (int)(scale)) 7745 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 7746 __builtin_ia32_scattersiv16si((int *)(addr), (__mmask16)(mask), \ 7747 (__v16si)(__m512i)(index), \ 7748 (__v16si)(__m512i)(v1), (int)(scale)) 7750 #define _mm512_i32scatter_pd(addr, index, v1, scale) \ 7751 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)-1, \ 7752 (__v8si)(__m256i)(index), \ 7753 (__v8df)(__m512d)(v1), (int)(scale)) 7755 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 7756 __builtin_ia32_scattersiv8df((double *)(addr), (__mmask8)(mask), \ 7757 (__v8si)(__m256i)(index), \ 7758 (__v8df)(__m512d)(v1), (int)(scale)) 7760 #define _mm512_i32scatter_epi64(addr, index, v1, scale) \ 7761 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)-1, \ 7762 (__v8si)(__m256i)(index), \ 7763 (__v8di)(__m512i)(v1), (int)(scale)) 7765 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 7766 __builtin_ia32_scattersiv8di((long long *)(addr), (__mmask8)(mask), \ 7767 (__v8si)(__m256i)(index), \ 7768 (__v8di)(__m512i)(v1), (int)(scale)) 7773 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7780 #define _mm_fmadd_round_ss(A, B, C, R) \ 7781 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7782 (__v4sf)(__m128)(B), \ 7783 (__v4sf)(__m128)(C), (__mmask8)-1, \ 7786 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \ 7787 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7788 (__v4sf)(__m128)(A), \ 7789 (__v4sf)(__m128)(B), (__mmask8)(U), \ 7795 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7802 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \ 7803 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7804 (__v4sf)(__m128)(B), \ 7805 (__v4sf)(__m128)(C), (__mmask8)(U), \ 7811 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7818 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \ 7819 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 7820 (__v4sf)(__m128)(X), \ 7821 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7827 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7834 #define _mm_fmsub_round_ss(A, B, C, R) \ 7835 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7836 (__v4sf)(__m128)(B), \ 7837 -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7840 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \ 7841 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7842 (__v4sf)(__m128)(A), \ 7843 -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7849 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7856 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \ 7857 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7858 (__v4sf)(__m128)(B), \ 7859 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 7865 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7872 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \ 7873 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 7874 (__v4sf)(__m128)(X), \ 7875 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7881 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7888 #define _mm_fnmadd_round_ss(A, B, C, R) \ 7889 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7890 -(__v4sf)(__m128)(B), \ 7891 (__v4sf)(__m128)(C), (__mmask8)-1, \ 7894 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \ 7895 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7896 -(__v4sf)(__m128)(A), \ 7897 (__v4sf)(__m128)(B), (__mmask8)(U), \ 7903 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7910 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \ 7911 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7912 -(__v4sf)(__m128)(B), \ 7913 (__v4sf)(__m128)(C), (__mmask8)(U), \ 7919 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7926 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \ 7927 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \ 7928 -(__v4sf)(__m128)(X), \ 7929 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7935 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7942 #define _mm_fnmsub_round_ss(A, B, C, R) \ 7943 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \ 7944 -(__v4sf)(__m128)(B), \ 7945 -(__v4sf)(__m128)(C), (__mmask8)-1, \ 7948 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \ 7949 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \ 7950 -(__v4sf)(__m128)(A), \ 7951 -(__v4sf)(__m128)(B), (__mmask8)(U), \ 7957 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7964 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \ 7965 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \ 7966 -(__v4sf)(__m128)(B), \ 7967 -(__v4sf)(__m128)(C), (__mmask8)(U), \ 7973 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7980 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \ 7981 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \ 7982 -(__v4sf)(__m128)(X), \ 7983 (__v4sf)(__m128)(Y), (__mmask8)(U), \ 7989 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
7996 #define _mm_fmadd_round_sd(A, B, C, R) \ 7997 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 7998 (__v2df)(__m128d)(B), \ 7999 (__v2df)(__m128d)(C), (__mmask8)-1, \ 8002 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \ 8003 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8004 (__v2df)(__m128d)(A), \ 8005 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8011 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8018 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \ 8019 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8020 (__v2df)(__m128d)(B), \ 8021 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8027 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8034 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \ 8035 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8036 (__v2df)(__m128d)(X), \ 8037 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8043 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8050 #define _mm_fmsub_round_sd(A, B, C, R) \ 8051 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8052 (__v2df)(__m128d)(B), \ 8053 -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8056 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \ 8057 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8058 (__v2df)(__m128d)(A), \ 8059 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8065 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8072 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \ 8073 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8074 (__v2df)(__m128d)(B), \ 8075 -(__v2df)(__m128d)(C), \ 8076 (__mmask8)(U), (int)(R)) 8081 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8088 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \ 8089 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8090 (__v2df)(__m128d)(X), \ 8091 (__v2df)(__m128d)(Y), \ 8092 (__mmask8)(U), (int)(R)) 8097 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8104 #define _mm_fnmadd_round_sd(A, B, C, R) \ 8105 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8106 -(__v2df)(__m128d)(B), \ 8107 (__v2df)(__m128d)(C), (__mmask8)-1, \ 8110 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \ 8111 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8112 -(__v2df)(__m128d)(A), \ 8113 (__v2df)(__m128d)(B), (__mmask8)(U), \ 8119 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8126 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \ 8127 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8128 -(__v2df)(__m128d)(B), \ 8129 (__v2df)(__m128d)(C), (__mmask8)(U), \ 8135 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8142 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \ 8143 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \ 8144 -(__v2df)(__m128d)(X), \ 8145 (__v2df)(__m128d)(Y), (__mmask8)(U), \ 8151 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8158 #define _mm_fnmsub_round_sd(A, B, C, R) \ 8159 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \ 8160 -(__v2df)(__m128d)(B), \ 8161 -(__v2df)(__m128d)(C), (__mmask8)-1, \ 8164 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \ 8165 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \ 8166 -(__v2df)(__m128d)(A), \ 8167 -(__v2df)(__m128d)(B), (__mmask8)(U), \ 8173 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8180 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \ 8181 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \ 8182 -(__v2df)(__m128d)(B), \ 8183 -(__v2df)(__m128d)(C), \ 8190 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8197 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \ 8198 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \ 8199 -(__v2df)(__m128d)(X), \ 8200 (__v2df)(__m128d)(Y), \ 8201 (__mmask8)(U), (int)(R)) 8203 #define _mm512_permutex_pd(X, C) \ 8204 (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)) 8206 #define _mm512_mask_permutex_pd(W, U, X, C) \ 8207 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8208 (__v8df)_mm512_permutex_pd((X), (C)), \ 8209 (__v8df)(__m512d)(W)) 8211 #define _mm512_maskz_permutex_pd(U, X, C) \ 8212 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ 8213 (__v8df)_mm512_permutex_pd((X), (C)), \ 8214 (__v8df)_mm512_setzero_pd()) 8216 #define _mm512_permutex_epi64(X, C) \ 8217 (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)) 8219 #define _mm512_mask_permutex_epi64(W, U, X, C) \ 8220 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8221 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8222 (__v8di)(__m512i)(W)) 8224 #define _mm512_maskz_permutex_epi64(U, X, C) \ 8225 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 8226 (__v8di)_mm512_permutex_epi64((X), (C)), \ 8227 (__v8di)_mm512_setzero_si512()) 8232 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8238 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8246 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8254 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8260 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8269 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8277 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8283 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8291 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8299 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8302 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 8307 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8316 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8321 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 8326 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8332 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8338 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8344 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8350 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8356 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8362 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8368 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8375 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8382 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)__P);
8389 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8395 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8396 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8402 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8410 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8419 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8427 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8436 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8444 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8453 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8461 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8467 #define _mm_cmp_round_ss_mask(X, Y, P, R) \ 8468 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8469 (__v4sf)(__m128)(Y), (int)(P), \ 8470 (__mmask8)-1, (int)(R)) 8472 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 8473 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8474 (__v4sf)(__m128)(Y), (int)(P), \ 8475 (__mmask8)(M), (int)(R)) 8477 #define _mm_cmp_ss_mask(X, Y, P) \ 8478 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8479 (__v4sf)(__m128)(Y), (int)(P), \ 8481 _MM_FROUND_CUR_DIRECTION) 8483 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 8484 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \ 8485 (__v4sf)(__m128)(Y), (int)(P), \ 8487 _MM_FROUND_CUR_DIRECTION) 8489 #define _mm_cmp_round_sd_mask(X, Y, P, R) \ 8490 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8491 (__v2df)(__m128d)(Y), (int)(P), \ 8492 (__mmask8)-1, (int)(R)) 8494 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 8495 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8496 (__v2df)(__m128d)(Y), (int)(P), \ 8497 (__mmask8)(M), (int)(R)) 8499 #define _mm_cmp_sd_mask(X, Y, P) \ 8500 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8501 (__v2df)(__m128d)(Y), (int)(P), \ 8503 _MM_FROUND_CUR_DIRECTION) 8505 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 8506 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \ 8507 (__v2df)(__m128d)(Y), (int)(P), \ 8509 _MM_FROUND_CUR_DIRECTION) 8572 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8573 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8579 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8587 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8595 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8596 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8602 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8610 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8618 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B), __W);
8624 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B),
8631 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B), __W);
8637 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B),
8644 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8650 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8656 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8660 return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
8666 return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
8674 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8678 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
8684 return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
8689 #define _mm512_shuffle_epi32(A, I) \ 8690 (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)) 8692 #define _mm512_mask_shuffle_epi32(W, U, A, I) \ 8693 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8694 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8695 (__v16si)(__m512i)(W)) 8697 #define _mm512_maskz_shuffle_epi32(U, A, I) \ 8698 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 8699 (__v16si)_mm512_shuffle_epi32((A), (I)), \ 8700 (__v16si)_mm512_setzero_si512()) 8705 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8713 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8721 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8729 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8737 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
8745 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
8753 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
8761 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
8769 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
8777 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
8785 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
8793 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
8801 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8809 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8817 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8825 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8830 #define _mm512_cvt_roundps_pd(A, R) \ 8831 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8832 (__v8df)_mm512_undefined_pd(), \ 8833 (__mmask8)-1, (int)(R)) 8835 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \ 8836 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8837 (__v8df)(__m512d)(W), \ 8838 (__mmask8)(U), (int)(R)) 8840 #define _mm512_maskz_cvt_roundps_pd(U, A, R) \ 8841 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \ 8842 (__v8df)_mm512_setzero_pd(), \ 8843 (__mmask8)(U), (int)(R)) 8848 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8854 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8862 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8882 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8890 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8898 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8906 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8914 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8921 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8928 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
8935 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
8939 #define _mm_cvt_roundsd_ss(A, B, R) \ 8940 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 8941 (__v2df)(__m128d)(B), \ 8942 (__v4sf)_mm_undefined_ps(), \ 8943 (__mmask8)-1, (int)(R)) 8945 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \ 8946 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 8947 (__v2df)(__m128d)(B), \ 8948 (__v4sf)(__m128)(W), \ 8949 (__mmask8)(U), (int)(R)) 8951 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \ 8952 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \ 8953 (__v2df)(__m128d)(B), \ 8954 (__v4sf)_mm_setzero_ps(), \ 8955 (__mmask8)(U), (int)(R)) 8960 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8969 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
8975 #define _mm_cvtss_i32 _mm_cvtss_si32 8976 #define _mm_cvtsd_i32 _mm_cvtsd_si32 8977 #define _mm_cvti32_sd _mm_cvtsi32_sd 8978 #define _mm_cvti32_ss _mm_cvtsi32_ss 8980 #define _mm_cvtss_i64 _mm_cvtss_si64 8981 #define _mm_cvtsd_i64 _mm_cvtsd_si64 8982 #define _mm_cvti64_sd _mm_cvtsi64_sd 8983 #define _mm_cvti64_ss _mm_cvtsi64_ss 8987 #define _mm_cvt_roundi64_sd(A, B, R) \ 8988 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 8991 #define _mm_cvt_roundsi64_sd(A, B, R) \ 8992 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \ 8996 #define _mm_cvt_roundsi32_ss(A, B, R) \ 8997 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) 8999 #define _mm_cvt_roundi32_ss(A, B, R) \ 9000 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)) 9003 #define _mm_cvt_roundsi64_ss(A, B, R) \ 9004 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9007 #define _mm_cvt_roundi64_ss(A, B, R) \ 9008 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \ 9012 #define _mm_cvt_roundss_sd(A, B, R) \ 9013 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9014 (__v4sf)(__m128)(B), \ 9015 (__v2df)_mm_undefined_pd(), \ 9016 (__mmask8)-1, (int)(R)) 9018 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \ 9019 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9020 (__v4sf)(__m128)(B), \ 9021 (__v2df)(__m128d)(W), \ 9022 (__mmask8)(U), (int)(R)) 9024 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \ 9025 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \ 9026 (__v4sf)(__m128)(B), \ 9027 (__v2df)_mm_setzero_pd(), \ 9028 (__mmask8)(U), (int)(R)) 9033 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9042 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9056 #define _mm_cvt_roundu64_sd(A, B, R) \ 9057 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ 9058 (unsigned long long)(B), (int)(R)) 9061 _mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
9068 #define _mm_cvt_roundu32_ss(A, B, R) \ 9069 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ 9080 #define _mm_cvt_roundu64_ss(A, B, R) \ 9081 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ 9082 (unsigned long long)(B), (int)(R)) 9085 _mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
9095 return (__m512i) __builtin_ia32_selectd_512(__M,
9103 return (__m512i) __builtin_ia32_selectq_512(__M,
9110 char __e58,
char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
9111 char __e52,
char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
9112 char __e46,
char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
9113 char __e40,
char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
9114 char __e34,
char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
9115 char __e28,
char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
9116 char __e22,
char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
9117 char __e16,
char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
9118 char __e10,
char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
9119 char __e4,
char __e3,
char __e2,
char __e1,
char __e0) {
9121 return __extension__ (__m512i)(__v64qi)
9122 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9123 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9124 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9125 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9126 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9127 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9128 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9129 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9134 short __e27,
short __e26,
short __e25,
short __e24,
short __e23,
9135 short __e22,
short __e21,
short __e20,
short __e19,
short __e18,
9136 short __e17,
short __e16,
short __e15,
short __e14,
short __e13,
9137 short __e12,
short __e11,
short __e10,
short __e9,
short __e8,
9138 short __e7,
short __e6,
short __e5,
short __e4,
short __e3,
9139 short __e2,
short __e1,
short __e0) {
9140 return __extension__ (__m512i)(__v32hi)
9141 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9142 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9143 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9144 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9149 int __E,
int __F,
int __G,
int __H,
9150 int __I,
int __J,
int __K,
int __L,
9151 int __M,
int __N,
int __O,
int __P)
9153 return __extension__ (__m512i)(__v16si)
9154 { __P, __O, __N, __M, __L, __K, __J, __I,
9155 __H, __G, __F, __E, __D, __C, __B, __A };
9158 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 9159 e8,e9,e10,e11,e12,e13,e14,e15) \ 9160 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ 9161 (e5),(e4),(e3),(e2),(e1),(e0)) 9165 long long __D,
long long __E,
long long __F,
9166 long long __G,
long long __H)
9168 return __extension__ (__m512i) (__v8di)
9169 { __H, __G, __F, __E, __D, __C, __B, __A };
9172 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 9173 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9177 double __E,
double __F,
double __G,
double __H)
9179 return __extension__ (__m512d)
9180 { __H, __G, __F, __E, __D, __C, __B, __A };
9183 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 9184 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) 9188 float __E,
float __F,
float __G,
float __H,
9189 float __I,
float __J,
float __K,
float __L,
9190 float __M,
float __N,
float __O,
float __P)
9192 return __extension__ (__m512)
9193 { __P, __O, __N, __M, __L, __K, __J, __I,
9194 __H, __G, __F, __E, __D, __C, __B, __A };
9197 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 9198 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ 9199 (e4),(e3),(e2),(e1),(e0)) 9235 #define _mm512_mask_reduce_operator(op) \ 9236 __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \ 9237 __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \ 9238 __m256i __t3 = (__m256i)(__t1 op __t2); \ 9239 __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \ 9240 __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \ 9241 __v2du __t6 = __t4 op __t5; \ 9242 __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9243 __v2du __t8 = __t6 op __t7; \ 9285 #undef _mm512_mask_reduce_operator 9287 #define _mm512_mask_reduce_operator(op) \ 9288 __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \ 9289 __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \ 9290 __m256d __t3 = __t1 op __t2; \ 9291 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ 9292 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ 9293 __m128d __t6 = __t4 op __t5; \ 9294 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9295 __m128d __t8 = __t6 op __t7; \ 9317 #undef _mm512_mask_reduce_operator 9319 #define _mm512_mask_reduce_operator(op) \ 9320 __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \ 9321 __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \ 9322 __m256i __t3 = (__m256i)(__t1 op __t2); \ 9323 __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \ 9324 __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \ 9325 __v4su __t6 = __t4 op __t5; \ 9326 __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9327 __v4su __t8 = __t6 op __t7; \ 9328 __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9329 __v4su __t10 = __t8 op __t9; \ 9375 #undef _mm512_mask_reduce_operator 9377 #define _mm512_mask_reduce_operator(op) \ 9378 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \ 9379 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \ 9380 __m256 __t3 = __t1 op __t2; \ 9381 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ 9382 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ 9383 __m128 __t6 = __t4 op __t5; \ 9384 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9385 __m128 __t8 = __t6 op __t7; \ 9386 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9387 __m128 __t10 = __t8 op __t9; \ 9411 #undef _mm512_mask_reduce_operator 9413 #define _mm512_mask_reduce_operator(op) \ 9414 __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \ 9415 __m512i __t2 = _mm512_##op(__V, __t1); \ 9416 __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \ 9417 __m512i __t4 = _mm512_##op(__t2, __t3); \ 9418 __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \ 9419 __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ 9465 #undef _mm512_mask_reduce_operator 9467 #define _mm512_mask_reduce_operator(op) \ 9468 __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \ 9469 __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \ 9470 __m256i __t3 = _mm256_##op(__t1, __t2); \ 9471 __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \ 9472 __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \ 9473 __m128i __t6 = _mm_##op(__t4, __t5); \ 9474 __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \ 9475 __m128i __t8 = _mm_##op(__t6, __t7); \ 9476 __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \ 9477 __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ 9523 #undef _mm512_mask_reduce_operator 9525 #define _mm512_mask_reduce_operator(op) \ 9526 __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \ 9527 __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \ 9528 __m256d __t3 = _mm256_##op(__t1, __t2); \ 9529 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ 9530 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ 9531 __m128d __t6 = _mm_##op(__t4, __t5); \ 9532 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ 9533 __m128d __t8 = _mm_##op(__t6, __t7); \ 9557 #undef _mm512_mask_reduce_operator 9559 #define _mm512_mask_reduce_operator(op) \ 9560 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \ 9561 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \ 9562 __m256 __t3 = _mm256_##op(__t1, __t2); \ 9563 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ 9564 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ 9565 __m128 __t6 = _mm_##op(__t4, __t5); \ 9566 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ 9567 __m128 __t8 = _mm_##op(__t6, __t7); \ 9568 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ 9569 __m128 __t10 = _mm_##op(__t8, __t9); \ 9593 #undef _mm512_mask_reduce_operator 9595 #undef __DEFAULT_FN_ATTRS512 9596 #undef __DEFAULT_FN_ATTRS128 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
#define __DEFAULT_FN_ATTRS512
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ void const void * __src
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ void int __a
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
#define _mm512_mask_reduce_operator(op)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
char __v64qi __attribute__((__vector_size__(64)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask2int(__mmask16 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(__m512i *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ vector float vector float __b
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(double *__P, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS128
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kmov(__mmask16 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64(__m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kor(__mmask16 __A, __mmask16 __B)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(float *__P, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b)