25 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
28 #ifndef __AVX512VLINTRIN_H
29 #define __AVX512VLINTRIN_H
31 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
34 static __inline __m128i
__attribute__((__always_inline__, __nodebug__, __target__(
"avx512f")))
35 _mm_setzero_di(
void) {
36 return (__m128i)(__v2di){ 0LL, 0LL};
43 return (
__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
49 return (
__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
55 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
61 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 0,
67 return (
__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
73 return (
__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
79 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
85 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 0,
91 return (
__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
97 return (
__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
103 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
109 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 0,
115 return (
__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
121 return (
__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
127 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
133 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 0,
140 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
146 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 5,
152 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
158 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 5,
164 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
170 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 5,
176 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
182 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 5,
188 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
194 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 5,
200 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
206 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 5,
212 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
218 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 5,
224 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
230 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 5,
236 return (
__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
242 return (
__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
248 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
254 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 6,
260 return (
__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
266 return (
__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
272 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
278 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 6,
284 return (
__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
290 return (
__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
296 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
302 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 6,
308 return (
__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
314 return (
__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
320 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
326 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 6,
332 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
338 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 2,
344 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
350 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 2,
356 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
362 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 2,
368 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
374 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 2,
380 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
386 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 2,
392 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
398 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 2,
404 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
410 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 2,
416 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
422 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 2,
428 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
434 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 1,
440 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
446 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 1,
452 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
458 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 1,
464 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
470 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 1,
476 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
482 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 1,
488 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
494 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 1,
500 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
506 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 1,
512 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
518 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 1,
524 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
530 return (
__mmask8)__builtin_ia32_cmpd128_mask((__v4si)__a, (__v4si)__b, 4,
536 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
542 return (
__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)__a, (__v4si)__b, 4,
548 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
554 return (
__mmask8)__builtin_ia32_cmpd256_mask((__v8si)__a, (__v8si)__b, 4,
560 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
566 return (
__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)__a, (__v8si)__b, 4,
572 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
578 return (
__mmask8)__builtin_ia32_cmpq128_mask((__v2di)__a, (__v2di)__b, 4,
584 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
590 return (
__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)__a, (__v2di)__b, 4,
596 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
602 return (
__mmask8)__builtin_ia32_cmpq256_mask((__v4di)__a, (__v4di)__b, 4,
608 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
614 return (
__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)__a, (__v4di)__b, 4,
621 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
629 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
637 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
645 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
653 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
661 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
669 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
677 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
685 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
693 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
701 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
709 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
717 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
725 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
733 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
741 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
749 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
757 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
765 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
773 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
781 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
789 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
797 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
805 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
813 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
821 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
829 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
837 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
845 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
859 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
873 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
888 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
902 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
916 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
930 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
945 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
959 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
973 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
987 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
1002 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
1016 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
1030 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
1044 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
1059 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
1070 #define _mm_cmp_epi32_mask(a, b, p) __extension__ ({ \
1071 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1072 (__v4si)(__m128i)(b), (int)(p), \
1075 #define _mm_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1076 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
1077 (__v4si)(__m128i)(b), (int)(p), \
1080 #define _mm_cmp_epu32_mask(a, b, p) __extension__ ({ \
1081 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1082 (__v4si)(__m128i)(b), (int)(p), \
1085 #define _mm_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1086 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
1087 (__v4si)(__m128i)(b), (int)(p), \
1090 #define _mm256_cmp_epi32_mask(a, b, p) __extension__ ({ \
1091 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1092 (__v8si)(__m256i)(b), (int)(p), \
1095 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
1096 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
1097 (__v8si)(__m256i)(b), (int)(p), \
1100 #define _mm256_cmp_epu32_mask(a, b, p) __extension__ ({ \
1101 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1102 (__v8si)(__m256i)(b), (int)(p), \
1105 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
1106 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
1107 (__v8si)(__m256i)(b), (int)(p), \
1110 #define _mm_cmp_epi64_mask(a, b, p) __extension__ ({ \
1111 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1112 (__v2di)(__m128i)(b), (int)(p), \
1115 #define _mm_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1116 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
1117 (__v2di)(__m128i)(b), (int)(p), \
1120 #define _mm_cmp_epu64_mask(a, b, p) __extension__ ({ \
1121 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1122 (__v2di)(__m128i)(b), (int)(p), \
1125 #define _mm_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1126 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
1127 (__v2di)(__m128i)(b), (int)(p), \
1130 #define _mm256_cmp_epi64_mask(a, b, p) __extension__ ({ \
1131 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1132 (__v4di)(__m256i)(b), (int)(p), \
1135 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
1136 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
1137 (__v4di)(__m256i)(b), (int)(p), \
1140 #define _mm256_cmp_epu64_mask(a, b, p) __extension__ ({ \
1141 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1142 (__v4di)(__m256i)(b), (int)(p), \
1145 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
1146 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
1147 (__v4di)(__m256i)(b), (int)(p), \
1150 #define _mm256_cmp_ps_mask(a, b, p) __extension__ ({ \
1151 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1152 (__v8sf)(__m256)(b), (int)(p), \
1155 #define _mm256_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
1156 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
1157 (__v8sf)(__m256)(b), (int)(p), \
1160 #define _mm256_cmp_pd_mask(a, b, p) __extension__ ({ \
1161 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1162 (__v4df)(__m256d)(b), (int)(p), \
1165 #define _mm256_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
1166 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
1167 (__v4df)(__m256d)(b), (int)(p), \
1170 #define _mm_cmp_ps_mask(a, b, p) __extension__ ({ \
1171 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1172 (__v4sf)(__m128)(b), (int)(p), \
1175 #define _mm_mask_cmp_ps_mask(m, a, b, p) __extension__ ({ \
1176 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
1177 (__v4sf)(__m128)(b), (int)(p), \
1180 #define _mm_cmp_pd_mask(a, b, p) __extension__ ({ \
1181 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1182 (__v2df)(__m128d)(b), (int)(p), \
1185 #define _mm_mask_cmp_pd_mask(m, a, b, p) __extension__ ({ \
1186 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
1187 (__v2df)(__m128d)(b), (int)(p), \
1193 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1202 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 ((__v2df) __A,
1211 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1220 return (__m128d) __builtin_ia32_vfmaddpd128_mask ((__v2df) __A,
1229 return (__m128d) __builtin_ia32_vfmaddpd128_maskz ((__v2df) __A,
1238 return (__m128d) __builtin_ia32_vfmaddpd128_mask3 (-(__v2df) __A,
1247 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1256 return (__m128d) __builtin_ia32_vfmaddpd128_maskz (-(__v2df) __A,
1265 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1274 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 ((__v4df) __A,
1283 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1292 return (__m256d) __builtin_ia32_vfmaddpd256_mask ((__v4df) __A,
1301 return (__m256d) __builtin_ia32_vfmaddpd256_maskz ((__v4df) __A,
1310 return (__m256d) __builtin_ia32_vfmaddpd256_mask3 (-(__v4df) __A,
1319 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1328 return (__m256d) __builtin_ia32_vfmaddpd256_maskz (-(__v4df) __A,
1337 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1346 return (__m128) __builtin_ia32_vfmaddps128_mask3 ((__v4sf) __A,
1355 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1364 return (__m128) __builtin_ia32_vfmaddps128_mask ((__v4sf) __A,
1373 return (__m128) __builtin_ia32_vfmaddps128_maskz ((__v4sf) __A,
1382 return (__m128) __builtin_ia32_vfmaddps128_mask3 (-(__v4sf) __A,
1391 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1400 return (__m128) __builtin_ia32_vfmaddps128_maskz (-(__v4sf) __A,
1409 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1418 return (__m256) __builtin_ia32_vfmaddps256_mask3 ((__v8sf) __A,
1427 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1436 return (__m256) __builtin_ia32_vfmaddps256_mask ((__v8sf) __A,
1445 return (__m256) __builtin_ia32_vfmaddps256_maskz ((__v8sf) __A,
1454 return (__m256) __builtin_ia32_vfmaddps256_mask3 (-(__v8sf) __A,
1463 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1472 return (__m256) __builtin_ia32_vfmaddps256_maskz (-(__v8sf) __A,
1481 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1490 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask3 ((__v2df) __A,
1500 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1510 return (__m128d) __builtin_ia32_vfmaddsubpd128_mask ((__v2df) __A,
1519 return (__m128d) __builtin_ia32_vfmaddsubpd128_maskz ((__v2df) __A,
1529 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1538 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3 ((__v4df) __A,
1548 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1558 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask ((__v4df) __A,
1567 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz ((__v4df) __A,
1577 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1586 return (__m128) __builtin_ia32_vfmaddsubps128_mask3 ((__v4sf) __A,
1595 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1604 return (__m128) __builtin_ia32_vfmaddsubps128_mask ((__v4sf) __A,
1613 return (__m128) __builtin_ia32_vfmaddsubps128_maskz ((__v4sf) __A,
1623 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1632 return (__m256) __builtin_ia32_vfmaddsubps256_mask3 ((__v8sf) __A,
1641 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1650 return (__m256) __builtin_ia32_vfmaddsubps256_mask ((__v8sf) __A,
1659 return (__m256) __builtin_ia32_vfmaddsubps256_maskz ((__v8sf) __A,
1668 return (__m128d) __builtin_ia32_vfmsubpd128_mask3 ((__v2df) __A,
1677 return (__m256d) __builtin_ia32_vfmsubpd256_mask3 ((__v4df) __A,
1686 return (__m128) __builtin_ia32_vfmsubps128_mask3 ((__v4sf) __A,
1695 return (__m256) __builtin_ia32_vfmsubps256_mask3 ((__v8sf) __A,
1704 return (__m128d) __builtin_ia32_vfmsubaddpd128_mask3 ((__v2df) __A,
1714 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3 ((__v4df) __A,
1724 return (__m128) __builtin_ia32_vfmsubaddps128_mask3 ((__v4sf) __A,
1733 return (__m256) __builtin_ia32_vfmsubaddps256_mask3 ((__v8sf) __A,
1742 return (__m128d) __builtin_ia32_vfnmaddpd128_mask ((__v2df) __A,
1751 return (__m256d) __builtin_ia32_vfnmaddpd256_mask ((__v4df) __A,
1760 return (__m128) __builtin_ia32_vfnmaddps128_mask ((__v4sf) __A,
1769 return (__m256) __builtin_ia32_vfnmaddps256_mask ((__v8sf) __A,
1778 return (__m128d) __builtin_ia32_vfnmsubpd128_mask ((__v2df) __A,
1787 return (__m128d) __builtin_ia32_vfnmsubpd128_mask3 ((__v2df) __A,
1796 return (__m256d) __builtin_ia32_vfnmsubpd256_mask ((__v4df) __A,
1805 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3 ((__v4df) __A,
1814 return (__m128) __builtin_ia32_vfnmsubps128_mask ((__v4sf) __A,
1823 return (__m128) __builtin_ia32_vfnmsubps128_mask3 ((__v4sf) __A,
1832 return (__m256) __builtin_ia32_vfnmsubps256_mask ((__v8sf) __A,
1841 return (__m256) __builtin_ia32_vfnmsubps256_mask3 ((__v8sf) __A,
1849 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1856 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1863 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1870 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1877 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1884 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1891 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1898 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1905 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
1912 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
1919 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
1926 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
1933 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
1940 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
1947 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
1954 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
1961 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1968 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1976 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1983 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1991 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1998 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
2006 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2013 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
2021 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2028 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
2036 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2043 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
2051 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2058 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
2066 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2073 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
2081 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
2088 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
2095 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
2102 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
2109 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
2116 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
2123 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
2130 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
2137 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2144 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2151 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2158 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2165 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2172 return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
2180 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2187 return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
2195 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2202 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
2210 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2217 return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
2225 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2232 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2240 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2247 return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
2255 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2263 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2270 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2278 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2286 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2293 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2301 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2308 return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
2316 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2323 return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
2331 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2338 return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
2346 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2353 return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
2361 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2369 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2376 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2384 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2392 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2399 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2407 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2414 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2422 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2429 return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
2437 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2445 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2452 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2460 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2468 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2475 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2483 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2490 return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
2498 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2505 return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
2513 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2521 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2528 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2536 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2544 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2551 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2559 return (__m128d) __builtin_convertvector(
2560 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2565 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2572 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2579 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2584 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2591 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2598 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2606 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2613 return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
2621 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2629 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2636 return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
2644 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2651 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2658 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2665 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2672 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2679 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2686 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2693 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2700 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2707 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2715 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2722 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2730 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2737 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2745 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2752 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2760 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2768 return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
2777 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2785 return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
2794 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2802 return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
2812 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2820 return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
2829 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2836 return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
2845 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2852 return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
2861 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2869 return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
2878 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2886 return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
2895 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2902 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2910 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2917 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2925 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2932 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2940 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2947 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2955 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2963 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2970 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2978 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2986 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2993 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
3001 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3009 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3016 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
3024 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3032 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3039 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
3047 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3054 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3061 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3068 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3075 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3082 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3089 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3096 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3103 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3110 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3117 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3124 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3131 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3138 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3145 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3152 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3159 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3166 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3173 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3180 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3187 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3194 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3201 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3208 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3215 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
3222 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
3229 return (__m256i)__builtin_ia32_selectd_256((
__mmask16)__U,
3236 return (__m256i)__builtin_ia32_selectd_256((
__mmask16)__U,
3243 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3251 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3258 return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
3266 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3274 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3281 return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
3289 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3296 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3303 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3310 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3317 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3327 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3334 return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
3343 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3353 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3360 return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
3369 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3376 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3383 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3390 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3397 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3406 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3416 return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
3423 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3432 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3442 return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
3449 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3456 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3463 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3470 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3477 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3487 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3494 return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
3503 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3513 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3520 return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
3529 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3536 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3543 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3550 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3557 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3567 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3574 return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
3583 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3593 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3600 return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
3607 #define _mm_roundscale_pd(A, imm) __extension__ ({ \
3608 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3610 (__v2df)_mm_setzero_pd(), \
3614 #define _mm_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3615 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3617 (__v2df)(__m128d)(W), \
3621 #define _mm_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3622 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3624 (__v2df)_mm_setzero_pd(), \
3628 #define _mm256_roundscale_pd(A, imm) __extension__ ({ \
3629 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3631 (__v4df)_mm256_setzero_pd(), \
3635 #define _mm256_mask_roundscale_pd(W, U, A, imm) __extension__ ({ \
3636 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3638 (__v4df)(__m256d)(W), \
3642 #define _mm256_maskz_roundscale_pd(U, A, imm) __extension__ ({ \
3643 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3645 (__v4df)_mm256_setzero_pd(), \
3648 #define _mm_roundscale_ps(A, imm) __extension__ ({ \
3649 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3650 (__v4sf)_mm_setzero_ps(), \
3654 #define _mm_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3655 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3656 (__v4sf)(__m128)(W), \
3660 #define _mm_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3661 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3662 (__v4sf)_mm_setzero_ps(), \
3665 #define _mm256_roundscale_ps(A, imm) __extension__ ({ \
3666 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3667 (__v8sf)_mm256_setzero_ps(), \
3670 #define _mm256_mask_roundscale_ps(W, U, A, imm) __extension__ ({ \
3671 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3672 (__v8sf)(__m256)(W), \
3676 #define _mm256_maskz_roundscale_ps(U, A, imm) __extension__ ({ \
3677 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3678 (__v8sf)_mm256_setzero_ps(), \
3683 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3693 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3701 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3710 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3720 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3728 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3737 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3746 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3754 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3763 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3773 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3781 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3788 #define _mm_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3789 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)-1, \
3790 (__v2di)(__m128i)(index), \
3791 (__v2df)(__m128d)(v1), (int)(scale)); })
3793 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3794 __builtin_ia32_scatterdiv2df((double *)(addr), (__mmask8)(mask), \
3795 (__v2di)(__m128i)(index), \
3796 (__v2df)(__m128d)(v1), (int)(scale)); })
3798 #define _mm_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3799 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)-1, \
3800 (__v2di)(__m128i)(index), \
3801 (__v2di)(__m128i)(v1), (int)(scale)); })
3803 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3804 __builtin_ia32_scatterdiv2di((long long *)(addr), (__mmask8)(mask), \
3805 (__v2di)(__m128i)(index), \
3806 (__v2di)(__m128i)(v1), (int)(scale)); })
3808 #define _mm256_i64scatter_pd(addr, index, v1, scale) __extension__ ({ \
3809 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)-1, \
3810 (__v4di)(__m256i)(index), \
3811 (__v4df)(__m256d)(v1), (int)(scale)); })
3813 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3814 __builtin_ia32_scatterdiv4df((double *)(addr), (__mmask8)(mask), \
3815 (__v4di)(__m256i)(index), \
3816 (__v4df)(__m256d)(v1), (int)(scale)); })
3818 #define _mm256_i64scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3819 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)-1, \
3820 (__v4di)(__m256i)(index), \
3821 (__v4di)(__m256i)(v1), (int)(scale)); })
3823 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3824 __builtin_ia32_scatterdiv4di((long long *)(addr), (__mmask8)(mask), \
3825 (__v4di)(__m256i)(index), \
3826 (__v4di)(__m256i)(v1), (int)(scale)); })
3828 #define _mm_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3829 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)-1, \
3830 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3833 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3834 __builtin_ia32_scatterdiv4sf((float *)(addr), (__mmask8)(mask), \
3835 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3838 #define _mm_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3839 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)-1, \
3840 (__v2di)(__m128i)(index), \
3841 (__v4si)(__m128i)(v1), (int)(scale)); })
3843 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3844 __builtin_ia32_scatterdiv4si((int *)(addr), (__mmask8)(mask), \
3845 (__v2di)(__m128i)(index), \
3846 (__v4si)(__m128i)(v1), (int)(scale)); })
3848 #define _mm256_i64scatter_ps(addr, index, v1, scale) __extension__ ({ \
3849 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)-1, \
3850 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3853 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3854 __builtin_ia32_scatterdiv8sf((float *)(addr), (__mmask8)(mask), \
3855 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3858 #define _mm256_i64scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3859 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)-1, \
3860 (__v4di)(__m256i)(index), \
3861 (__v4si)(__m128i)(v1), (int)(scale)); })
3863 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3864 __builtin_ia32_scatterdiv8si((int *)(addr), (__mmask8)(mask), \
3865 (__v4di)(__m256i)(index), \
3866 (__v4si)(__m128i)(v1), (int)(scale)); })
3868 #define _mm_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
3869 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)-1, \
3870 (__v4si)(__m128i)(index), \
3871 (__v2df)(__m128d)(v1), (int)(scale)); })
3873 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3874 __builtin_ia32_scattersiv2df((double *)(addr), (__mmask8)(mask), \
3875 (__v4si)(__m128i)(index), \
3876 (__v2df)(__m128d)(v1), (int)(scale)); })
3878 #define _mm_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3879 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)-1, \
3880 (__v4si)(__m128i)(index), \
3881 (__v2di)(__m128i)(v1), (int)(scale)); })
3883 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3884 __builtin_ia32_scattersiv2di((long long *)(addr), (__mmask8)(mask), \
3885 (__v4si)(__m128i)(index), \
3886 (__v2di)(__m128i)(v1), (int)(scale)); })
3888 #define _mm256_i32scatter_pd(addr, index, v1, scale) __extension__ ({ \
3889 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)-1, \
3890 (__v4si)(__m128i)(index), \
3891 (__v4df)(__m256d)(v1), (int)(scale)); })
3893 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) __extension__ ({ \
3894 __builtin_ia32_scattersiv4df((double *)(addr), (__mmask8)(mask), \
3895 (__v4si)(__m128i)(index), \
3896 (__v4df)(__m256d)(v1), (int)(scale)); })
3898 #define _mm256_i32scatter_epi64(addr, index, v1, scale) __extension__ ({ \
3899 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)-1, \
3900 (__v4si)(__m128i)(index), \
3901 (__v4di)(__m256i)(v1), (int)(scale)); })
3903 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) __extension__ ({ \
3904 __builtin_ia32_scattersiv4di((long long *)(addr), (__mmask8)(mask), \
3905 (__v4si)(__m128i)(index), \
3906 (__v4di)(__m256i)(v1), (int)(scale)); })
3908 #define _mm_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
3909 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)-1, \
3910 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3913 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3914 __builtin_ia32_scattersiv4sf((float *)(addr), (__mmask8)(mask), \
3915 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3918 #define _mm_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3919 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)-1, \
3920 (__v4si)(__m128i)(index), \
3921 (__v4si)(__m128i)(v1), (int)(scale)); })
3923 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3924 __builtin_ia32_scattersiv4si((int *)(addr), (__mmask8)(mask), \
3925 (__v4si)(__m128i)(index), \
3926 (__v4si)(__m128i)(v1), (int)(scale)); })
3928 #define _mm256_i32scatter_ps(addr, index, v1, scale) __extension__ ({ \
3929 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)-1, \
3930 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3933 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) __extension__ ({ \
3934 __builtin_ia32_scattersiv8sf((float *)(addr), (__mmask8)(mask), \
3935 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3938 #define _mm256_i32scatter_epi32(addr, index, v1, scale) __extension__ ({ \
3939 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)-1, \
3940 (__v8si)(__m256i)(index), \
3941 (__v8si)(__m256i)(v1), (int)(scale)); })
3943 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) __extension__ ({ \
3944 __builtin_ia32_scattersiv8si((int *)(addr), (__mmask8)(mask), \
3945 (__v8si)(__m256i)(index), \
3946 (__v8si)(__m256i)(v1), (int)(scale)); })
3950 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3957 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3964 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3971 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3978 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3985 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3992 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3999 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
4006 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
4013 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
4020 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
4027 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
4034 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
4041 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
4048 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
4055 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
4063 return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
4073 return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
4083 return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
4094 return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
4105 return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
4115 return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
4125 return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
4135 return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
4144 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4154 return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
4164 return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
4174 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4184 return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
4193 __m256i __I, __m256i __B) {
4194 return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
4204 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4215 return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
4226 return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
4236 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4247 return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
4258 return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
4268 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4278 return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
4288 return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
4298 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4308 return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
4318 return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
4328 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4338 return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
4348 return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
4359 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4369 return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
4378 __m256i __I, __m256i __B) {
4379 return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
4390 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4398 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4406 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4414 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4422 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4430 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4438 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4446 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4454 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4462 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4470 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4478 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4486 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4494 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4502 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4510 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4518 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4526 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4534 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4542 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4551 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4559 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4567 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4575 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4583 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4591 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4599 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4607 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4615 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4623 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4631 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4639 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4647 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4655 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4663 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4671 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4679 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4687 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4695 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4703 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4709 #define _mm_rol_epi32(a, b) __extension__ ({\
4710 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4711 (__v4si)_mm_setzero_si128(), \
4714 #define _mm_mask_rol_epi32(w, u, a, b) __extension__ ({\
4715 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4716 (__v4si)(__m128i)(w), (__mmask8)(u)); })
4718 #define _mm_maskz_rol_epi32(u, a, b) __extension__ ({\
4719 (__m128i)__builtin_ia32_prold128_mask((__v4si)(__m128i)(a), (int)(b), \
4720 (__v4si)_mm_setzero_si128(), \
4723 #define _mm256_rol_epi32(a, b) __extension__ ({\
4724 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4725 (__v8si)_mm256_setzero_si256(), \
4728 #define _mm256_mask_rol_epi32(w, u, a, b) __extension__ ({\
4729 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4730 (__v8si)(__m256i)(w), (__mmask8)(u)); })
4732 #define _mm256_maskz_rol_epi32(u, a, b) __extension__ ({\
4733 (__m256i)__builtin_ia32_prold256_mask((__v8si)(__m256i)(a), (int)(b), \
4734 (__v8si)_mm256_setzero_si256(), \
4737 #define _mm_rol_epi64(a, b) __extension__ ({\
4738 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4739 (__v2di)_mm_setzero_di(), \
4742 #define _mm_mask_rol_epi64(w, u, a, b) __extension__ ({\
4743 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4744 (__v2di)(__m128i)(w), (__mmask8)(u)); })
4746 #define _mm_maskz_rol_epi64(u, a, b) __extension__ ({\
4747 (__m128i)__builtin_ia32_prolq128_mask((__v2di)(__m128i)(a), (int)(b), \
4748 (__v2di)_mm_setzero_di(), \
4751 #define _mm256_rol_epi64(a, b) __extension__ ({\
4752 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4753 (__v4di)_mm256_setzero_si256(), \
4756 #define _mm256_mask_rol_epi64(w, u, a, b) __extension__ ({\
4757 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4758 (__v4di)(__m256i)(w), (__mmask8)(u)); })
4760 #define _mm256_maskz_rol_epi64(u, a, b) __extension__ ({\
4761 (__m256i)__builtin_ia32_prolq256_mask((__v4di)(__m256i)(a), (int)(b), \
4762 (__v4di)_mm256_setzero_si256(), \
4768 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4779 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4788 return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A,
4798 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4809 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4818 return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A,
4828 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4839 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4848 return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A,
4858 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4869 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4878 return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A,
4885 #define _mm_ror_epi32(A, B) __extension__ ({ \
4886 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4887 (__v4si)_mm_setzero_si128(), \
4890 #define _mm_mask_ror_epi32(W, U, A, B) __extension__ ({ \
4891 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4892 (__v4si)(__m128i)(W), (__mmask8)(U)); })
4894 #define _mm_maskz_ror_epi32(U, A, B) __extension__ ({ \
4895 (__m128i)__builtin_ia32_prord128_mask((__v4si)(__m128i)(A), (int)(B), \
4896 (__v4si)_mm_setzero_si128(), \
4899 #define _mm256_ror_epi32(A, B) __extension__ ({ \
4900 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4901 (__v8si)_mm256_setzero_si256(), \
4904 #define _mm256_mask_ror_epi32(W, U, A, B) __extension__ ({ \
4905 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4906 (__v8si)(__m256i)(W), (__mmask8)(U)); })
4908 #define _mm256_maskz_ror_epi32(U, A, B) __extension__ ({ \
4909 (__m256i)__builtin_ia32_prord256_mask((__v8si)(__m256i)(A), (int)(B), \
4910 (__v8si)_mm256_setzero_si256(), \
4913 #define _mm_ror_epi64(A, B) __extension__ ({ \
4914 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4915 (__v2di)_mm_setzero_di(), \
4918 #define _mm_mask_ror_epi64(W, U, A, B) __extension__ ({ \
4919 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4920 (__v2di)(__m128i)(W), (__mmask8)(U)); })
4922 #define _mm_maskz_ror_epi64(U, A, B) __extension__ ({ \
4923 (__m128i)__builtin_ia32_prorq128_mask((__v2di)(__m128i)(A), (int)(B), \
4924 (__v2di)_mm_setzero_di(), \
4927 #define _mm256_ror_epi64(A, B) __extension__ ({ \
4928 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4929 (__v4di)_mm256_setzero_si256(), \
4932 #define _mm256_mask_ror_epi64(W, U, A, B) __extension__ ({ \
4933 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4934 (__v4di)(__m256i)(W), (__mmask8)(U)); })
4936 #define _mm256_maskz_ror_epi64(U, A, B) __extension__ ({ \
4937 (__m256i)__builtin_ia32_prorq256_mask((__v4di)(__m256i)(A), (int)(B), \
4938 (__v4di)_mm256_setzero_si256(), \
4944 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4952 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4960 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4968 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4976 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4984 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4992 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5000 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5008 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5016 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5018 (__v2di)_mm_setzero_di());
5024 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5032 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5040 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5048 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5050 (__v2di)_mm_setzero_di());
5056 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5064 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5072 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5083 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5092 return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A,
5102 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5113 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5122 return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A,
5132 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5143 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5152 return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A,
5162 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5173 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5182 return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A,
5192 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5200 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5202 (__v2di)_mm_setzero_di());
5208 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5216 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5224 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5232 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5240 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5248 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5256 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5264 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5266 (__v2di)_mm_setzero_di());
5272 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5280 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5288 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5296 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5304 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5312 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5320 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5328 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5336 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5344 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5352 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5360 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5368 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5376 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5384 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5392 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5394 (__v2di)_mm_setzero_di());
5400 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5408 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5416 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5424 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5426 (__v2di)_mm_setzero_di());
5432 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5440 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5448 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5456 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
5464 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5472 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
5480 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5486 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5494 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5496 (__v2di)_mm_setzero_di());
5502 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5508 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5516 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5524 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5532 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5541 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5549 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5557 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5566 return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
5576 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5585 return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
5595 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5603 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5611 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5619 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5621 (__v2di) _mm_setzero_di ());
5627 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5635 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5643 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5652 return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
5662 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5671 return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
5681 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5689 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5697 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5705 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5713 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5721 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5727 #define _mm_mask_set1_epi32(O, M, A) __extension__ ({ \
5728 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5729 (__v4si)(__m128i)(O), \
5732 #define _mm_maskz_set1_epi32(M, A) __extension__ ({ \
5733 (__m128i)__builtin_ia32_pbroadcastd128_gpr_mask((int)(A), \
5734 (__v4si)_mm_setzero_si128(), \
5737 #define _mm256_mask_set1_epi32(O, M, A) __extension__ ({ \
5738 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5739 (__v8si)(__m256i)(O), \
5742 #define _mm256_maskz_set1_epi32(M, A) __extension__ ({ \
5743 (__m256i)__builtin_ia32_pbroadcastd256_gpr_mask((int)(A), \
5744 (__v8si)_mm256_setzero_si256(), \
5749 _mm_mask_set1_epi64 (__m128i __O,
__mmask8 __M,
long long __A)
5751 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A, (__v2di) __O,
5756 _mm_maskz_set1_epi64 (
__mmask8 __M,
long long __A)
5758 return (__m128i) __builtin_ia32_pbroadcastq128_gpr_mask (__A,
5765 _mm256_mask_set1_epi64 (__m256i __O,
__mmask8 __M,
long long __A)
5767 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A, (__v4di) __O,
5772 _mm256_maskz_set1_epi64 (
__mmask8 __M,
long long __A)
5774 return (__m256i) __builtin_ia32_pbroadcastq256_gpr_mask (__A,
5781 #define _mm_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5782 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5783 (__v2df)(__m128d)(B), \
5784 (__v2di)(__m128i)(C), (int)(imm), \
5787 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5788 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5789 (__v2df)(__m128d)(B), \
5790 (__v2di)(__m128i)(C), (int)(imm), \
5793 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5794 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5795 (__v2df)(__m128d)(B), \
5796 (__v2di)(__m128i)(C), \
5797 (int)(imm), (__mmask8)(U)); })
5799 #define _mm256_fixupimm_pd(A, B, C, imm) __extension__ ({ \
5800 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5801 (__v4df)(__m256d)(B), \
5802 (__v4di)(__m256i)(C), (int)(imm), \
5805 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) __extension__ ({ \
5806 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5807 (__v4df)(__m256d)(B), \
5808 (__v4di)(__m256i)(C), (int)(imm), \
5811 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) __extension__ ({ \
5812 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5813 (__v4df)(__m256d)(B), \
5814 (__v4di)(__m256i)(C), \
5815 (int)(imm), (__mmask8)(U)); })
5817 #define _mm_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5818 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5819 (__v4sf)(__m128)(B), \
5820 (__v4si)(__m128i)(C), (int)(imm), \
5823 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5824 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5825 (__v4sf)(__m128)(B), \
5826 (__v4si)(__m128i)(C), (int)(imm), \
5829 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5830 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5831 (__v4sf)(__m128)(B), \
5832 (__v4si)(__m128i)(C), (int)(imm), \
5835 #define _mm256_fixupimm_ps(A, B, C, imm) __extension__ ({ \
5836 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5837 (__v8sf)(__m256)(B), \
5838 (__v8si)(__m256i)(C), (int)(imm), \
5841 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) __extension__ ({ \
5842 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5843 (__v8sf)(__m256)(B), \
5844 (__v8si)(__m256i)(C), (int)(imm), \
5847 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) __extension__ ({ \
5848 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5849 (__v8sf)(__m256)(B), \
5850 (__v8si)(__m256i)(C), (int)(imm), \
5856 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5864 return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
5873 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5881 return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
5890 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5898 return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
5907 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5915 return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
5924 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5932 return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
5941 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5949 return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
5958 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5966 return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
5975 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5983 return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
5992 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6000 return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
6009 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6017 return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
6026 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6034 return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
6043 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6051 return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
6060 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
6068 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
6076 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
6084 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
6092 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
6100 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
6108 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
6116 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
6124 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
6132 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
6140 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
6148 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
6157 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6165 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6173 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6181 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6189 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6197 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6205 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6213 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6221 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6229 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6237 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6245 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6253 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6261 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6269 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6277 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6285 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6294 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6302 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
6311 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6320 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6328 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
6337 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6346 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6354 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6363 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6372 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6380 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6386 #define _mm_mask_permute_pd(W, U, X, C) __extension__ ({ \
6387 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6388 (__v2df)_mm_permute_pd((X), (C)), \
6389 (__v2df)(__m128d)(W)); })
6391 #define _mm_maskz_permute_pd(U, X, C) __extension__ ({ \
6392 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6393 (__v2df)_mm_permute_pd((X), (C)), \
6394 (__v2df)_mm_setzero_pd()); })
6396 #define _mm256_mask_permute_pd(W, U, X, C) __extension__ ({ \
6397 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6398 (__v4df)_mm256_permute_pd((X), (C)), \
6399 (__v4df)(__m256d)(W)); })
6401 #define _mm256_maskz_permute_pd(U, X, C) __extension__ ({ \
6402 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6403 (__v4df)_mm256_permute_pd((X), (C)), \
6404 (__v4df)_mm256_setzero_pd()); })
6406 #define _mm_mask_permute_ps(W, U, X, C) __extension__ ({ \
6407 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6408 (__v4sf)_mm_permute_ps((X), (C)), \
6409 (__v4sf)(__m128)(W)); })
6411 #define _mm_maskz_permute_ps(U, X, C) __extension__ ({ \
6412 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6413 (__v4sf)_mm_permute_ps((X), (C)), \
6414 (__v4sf)_mm_setzero_ps()); })
6416 #define _mm256_mask_permute_ps(W, U, X, C) __extension__ ({ \
6417 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6418 (__v8sf)_mm256_permute_ps((X), (C)), \
6419 (__v8sf)(__m256)(W)); })
6421 #define _mm256_maskz_permute_ps(U, X, C) __extension__ ({ \
6422 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6423 (__v8sf)_mm256_permute_ps((X), (C)), \
6424 (__v8sf)_mm256_setzero_ps()); })
6429 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6437 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6445 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6453 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6461 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6469 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6477 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6485 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6493 return (
__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6501 return (
__mmask8) __builtin_ia32_ptestmd128 ((__v4si) __A,
6508 return (
__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6516 return (
__mmask8) __builtin_ia32_ptestmd256 ((__v8si) __A,
6523 return (
__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6531 return (
__mmask8) __builtin_ia32_ptestmq128 ((__v2di) __A,
6538 return (
__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6546 return (
__mmask8) __builtin_ia32_ptestmq256 ((__v4di) __A,
6553 return (
__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6561 return (
__mmask8) __builtin_ia32_ptestnmd128 ((__v4si) __A,
6568 return (
__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6576 return (
__mmask8) __builtin_ia32_ptestnmd256 ((__v8si) __A,
6583 return (
__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6591 return (
__mmask8) __builtin_ia32_ptestnmq128 ((__v2di) __A,
6598 return (
__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6606 return (
__mmask8) __builtin_ia32_ptestnmq256 ((__v4di) __A,
6615 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6623 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6631 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6639 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6647 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6655 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6657 (__v2di)_mm_setzero_di());
6663 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6671 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6679 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6687 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6695 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6703 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6711 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6719 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6721 (__v2di)_mm_setzero_di());
6727 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6735 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6743 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6751 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6759 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6767 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6775 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6783 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6791 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6799 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6807 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6813 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6821 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6823 (__v2di)_mm_setzero_di());
6829 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6835 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6843 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6851 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6857 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6865 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6867 (__v2di)_mm_setzero_di());
6873 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6879 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6887 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6892 #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6893 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6894 (__v4si)(__m128i)(B), \
6895 (__v4si)(__m128i)(C), (int)(imm), \
6898 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6899 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6900 (__v4si)(__m128i)(B), \
6901 (__v4si)(__m128i)(C), (int)(imm), \
6904 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6905 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6906 (__v4si)(__m128i)(B), \
6907 (__v4si)(__m128i)(C), (int)(imm), \
6910 #define _mm256_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \
6911 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6912 (__v8si)(__m256i)(B), \
6913 (__v8si)(__m256i)(C), (int)(imm), \
6916 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) __extension__ ({ \
6917 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6918 (__v8si)(__m256i)(B), \
6919 (__v8si)(__m256i)(C), (int)(imm), \
6922 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) __extension__ ({ \
6923 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6924 (__v8si)(__m256i)(B), \
6925 (__v8si)(__m256i)(C), (int)(imm), \
6928 #define _mm_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6929 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6930 (__v2di)(__m128i)(B), \
6931 (__v2di)(__m128i)(C), (int)(imm), \
6934 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6935 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6936 (__v2di)(__m128i)(B), \
6937 (__v2di)(__m128i)(C), (int)(imm), \
6940 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6941 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6942 (__v2di)(__m128i)(B), \
6943 (__v2di)(__m128i)(C), (int)(imm), \
6946 #define _mm256_ternarylogic_epi64(A, B, C, imm) __extension__ ({ \
6947 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6948 (__v4di)(__m256i)(B), \
6949 (__v4di)(__m256i)(C), (int)(imm), \
6952 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) __extension__ ({ \
6953 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6954 (__v4di)(__m256i)(B), \
6955 (__v4di)(__m256i)(C), (int)(imm), \
6958 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) __extension__ ({ \
6959 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6960 (__v4di)(__m256i)(B), \
6961 (__v4di)(__m256i)(C), (int)(imm), \
6966 #define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
6967 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
6968 (__v8sf)(__m256)(B), (int)(imm), \
6969 (__v8sf)_mm256_setzero_ps(), \
6972 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
6973 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
6974 (__v8sf)(__m256)(B), (int)(imm), \
6975 (__v8sf)(__m256)(W), \
6978 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
6979 (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
6980 (__v8sf)(__m256)(B), (int)(imm), \
6981 (__v8sf)_mm256_setzero_ps(), \
6984 #define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
6985 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
6986 (__v4df)(__m256d)(B), \
6988 (__v4df)_mm256_setzero_pd(), \
6991 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
6992 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
6993 (__v4df)(__m256d)(B), \
6995 (__v4df)(__m256d)(W), \
6998 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
6999 (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
7000 (__v4df)(__m256d)(B), \
7002 (__v4df)_mm256_setzero_pd(), \
7005 #define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
7006 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7007 (__v8si)(__m256i)(B), \
7009 (__v8si)_mm256_setzero_si256(), \
7012 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
7013 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7014 (__v8si)(__m256i)(B), \
7016 (__v8si)(__m256i)(W), \
7019 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
7020 (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
7021 (__v8si)(__m256i)(B), \
7023 (__v8si)_mm256_setzero_si256(), \
7026 #define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
7027 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7028 (__v4di)(__m256i)(B), \
7030 (__v4di)_mm256_setzero_si256(), \
7033 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
7034 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7035 (__v4di)(__m256i)(B), \
7037 (__v4di)(__m256i)(W), \
7040 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
7041 (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
7042 (__v4di)(__m256i)(B), \
7044 (__v4di)_mm256_setzero_si256(), \
7047 #define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7048 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7049 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7050 (__v2df)(__m128d)(W)); })
7052 #define _mm_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7053 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
7054 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
7055 (__v2df)_mm_setzero_pd()); })
7057 #define _mm256_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
7058 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7059 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7060 (__v4df)(__m256d)(W)); })
7062 #define _mm256_maskz_shuffle_pd(U, A, B, M) __extension__ ({ \
7063 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
7064 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
7065 (__v4df)_mm256_setzero_pd()); })
7067 #define _mm_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7068 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7069 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7070 (__v4sf)(__m128)(W)); })
7072 #define _mm_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7073 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
7074 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
7075 (__v4sf)_mm_setzero_ps()); })
7077 #define _mm256_mask_shuffle_ps(W, U, A, B, M) __extension__ ({ \
7078 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7079 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7080 (__v8sf)(__m256)(W)); })
7082 #define _mm256_maskz_shuffle_ps(U, A, B, M) __extension__ ({ \
7083 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7084 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
7085 (__v8sf)_mm256_setzero_ps()); })
7090 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7099 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7107 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
7116 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7125 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7133 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
7142 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7151 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7159 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
7168 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7177 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7185 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
7194 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
7195 0, 1, 2, 3, 0, 1, 2, 3);
7201 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
7209 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
7217 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
7218 0, 1, 2, 3, 0, 1, 2, 3);
7224 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
7232 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
7240 return (__m256d)__builtin_ia32_selectpd_256(__M,
7248 return (__m256d)__builtin_ia32_selectpd_256(__M,
7256 return (__m128)__builtin_ia32_selectps_128(__M,
7264 return (__m128)__builtin_ia32_selectps_128(__M,
7272 return (__m256)__builtin_ia32_selectps_256(__M,
7280 return (__m256)__builtin_ia32_selectps_256(__M,
7288 return (__m128i)__builtin_ia32_selectd_128(__M,
7296 return (__m128i)__builtin_ia32_selectd_128(__M,
7304 return (__m256i)__builtin_ia32_selectd_256(__M,
7312 return (__m256i)__builtin_ia32_selectd_256(__M,
7320 return (__m128i)__builtin_ia32_selectq_128(__M,
7328 return (__m128i)__builtin_ia32_selectq_128(__M,
7336 return (__m256i)__builtin_ia32_selectq_256(__M,
7344 return (__m256i)__builtin_ia32_selectq_256(__M,
7352 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7360 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7361 (__v16qi) __O, __M);
7367 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
7375 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7381 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7389 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7390 (__v16qi) __O, __M);
7396 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7404 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7410 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7418 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7426 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7434 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7440 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7448 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7455 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7463 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7469 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7477 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7478 (__v16qi) __O, __M);
7484 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7492 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7498 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7506 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7507 (__v16qi) __O, __M);
7513 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7521 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7527 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7535 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7542 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7550 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7556 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7564 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7572 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7580 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7586 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7594 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7601 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7609 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7615 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7623 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7630 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7638 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7644 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7652 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7660 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7668 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7674 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7682 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7690 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7698 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7704 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7712 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7719 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7727 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7733 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7741 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7748 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7756 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7762 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7770 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7778 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7786 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7792 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7800 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7808 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7816 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7822 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7830 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7837 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7845 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7851 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7859 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7866 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7874 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7880 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7888 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7895 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7903 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7909 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7917 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7924 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7932 return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7938 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7946 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7947 (__v16qi) __O, __M);
7953 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7962 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7968 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7976 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7977 (__v16qi) __O, __M);
7983 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7991 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7997 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8005 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8012 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
8020 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
8026 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8034 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8041 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
8049 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
8055 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8063 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8064 (__v16qi) __O, __M);
8070 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
8078 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
8084 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8092 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8093 (__v16qi) __O, __M);
8099 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
8107 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
8113 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8121 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8128 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
8136 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
8142 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8150 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8157 return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A,
8165 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
8171 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8179 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8187 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
8195 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
8201 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8209 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8216 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
8224 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
8227 #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
8228 (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \
8229 (__v8sf)_mm256_undefined_ps(), \
8230 ((imm) & 1) ? 4 : 0, \
8231 ((imm) & 1) ? 5 : 1, \
8232 ((imm) & 1) ? 6 : 2, \
8233 ((imm) & 1) ? 7 : 3); })
8235 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
8236 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
8237 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
8240 #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
8241 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
8242 (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
8243 (__v4sf)_mm_setzero_ps()); })
8245 #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
8246 (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \
8247 (__v8si)_mm256_undefined_si256(), \
8248 ((imm) & 1) ? 4 : 0, \
8249 ((imm) & 1) ? 5 : 1, \
8250 ((imm) & 1) ? 6 : 2, \
8251 ((imm) & 1) ? 7 : 3); })
8253 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
8254 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8255 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
8258 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
8259 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8260 (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
8261 (__v4si)_mm_setzero_si128()); })
8263 #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
8264 (__m256)__builtin_shufflevector((__v8sf)(A), \
8265 (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
8266 ((imm) & 0x1) ? 0 : 8, \
8267 ((imm) & 0x1) ? 1 : 9, \
8268 ((imm) & 0x1) ? 2 : 10, \
8269 ((imm) & 0x1) ? 3 : 11, \
8270 ((imm) & 0x1) ? 8 : 4, \
8271 ((imm) & 0x1) ? 9 : 5, \
8272 ((imm) & 0x1) ? 10 : 6, \
8273 ((imm) & 0x1) ? 11 : 7); })
8275 #define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
8276 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
8277 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
8280 #define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
8281 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
8282 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
8283 (__v8sf)_mm256_setzero_ps()); })
8285 #define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
8286 (__m256i)__builtin_shufflevector((__v8si)(A), \
8287 (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
8288 ((imm) & 0x1) ? 0 : 8, \
8289 ((imm) & 0x1) ? 1 : 9, \
8290 ((imm) & 0x1) ? 2 : 10, \
8291 ((imm) & 0x1) ? 3 : 11, \
8292 ((imm) & 0x1) ? 8 : 4, \
8293 ((imm) & 0x1) ? 9 : 5, \
8294 ((imm) & 0x1) ? 10 : 6, \
8295 ((imm) & 0x1) ? 11 : 7); })
8297 #define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
8298 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8299 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
8302 #define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
8303 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8304 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
8305 (__v8si)_mm256_setzero_si256()); })
8307 #define _mm_getmant_pd(A, B, C) __extension__({\
8308 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8309 (int)(((C)<<2) | (B)), \
8310 (__v2df)_mm_setzero_pd(), \
8313 #define _mm_mask_getmant_pd(W, U, A, B, C) __extension__({\
8314 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8315 (int)(((C)<<2) | (B)), \
8316 (__v2df)(__m128d)(W), \
8319 #define _mm_maskz_getmant_pd(U, A, B, C) __extension__({\
8320 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
8321 (int)(((C)<<2) | (B)), \
8322 (__v2df)_mm_setzero_pd(), \
8325 #define _mm256_getmant_pd(A, B, C) __extension__ ({ \
8326 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8327 (int)(((C)<<2) | (B)), \
8328 (__v4df)_mm256_setzero_pd(), \
8331 #define _mm256_mask_getmant_pd(W, U, A, B, C) __extension__ ({ \
8332 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8333 (int)(((C)<<2) | (B)), \
8334 (__v4df)(__m256d)(W), \
8337 #define _mm256_maskz_getmant_pd(U, A, B, C) __extension__ ({ \
8338 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
8339 (int)(((C)<<2) | (B)), \
8340 (__v4df)_mm256_setzero_pd(), \
8343 #define _mm_getmant_ps(A, B, C) __extension__ ({ \
8344 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8345 (int)(((C)<<2) | (B)), \
8346 (__v4sf)_mm_setzero_ps(), \
8349 #define _mm_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8350 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8351 (int)(((C)<<2) | (B)), \
8352 (__v4sf)(__m128)(W), \
8355 #define _mm_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8356 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
8357 (int)(((C)<<2) | (B)), \
8358 (__v4sf)_mm_setzero_ps(), \
8361 #define _mm256_getmant_ps(A, B, C) __extension__ ({ \
8362 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8363 (int)(((C)<<2) | (B)), \
8364 (__v8sf)_mm256_setzero_ps(), \
8367 #define _mm256_mask_getmant_ps(W, U, A, B, C) __extension__ ({ \
8368 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8369 (int)(((C)<<2) | (B)), \
8370 (__v8sf)(__m256)(W), \
8373 #define _mm256_maskz_getmant_ps(U, A, B, C) __extension__ ({ \
8374 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
8375 (int)(((C)<<2) | (B)), \
8376 (__v8sf)_mm256_setzero_ps(), \
8379 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8380 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
8381 (double const *)(addr), \
8382 (__v2di)(__m128i)(index), \
8383 (__mmask8)(mask), (int)(scale)); })
8385 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8386 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
8387 (long long const *)(addr), \
8388 (__v2di)(__m128i)(index), \
8389 (__mmask8)(mask), (int)(scale)); })
8391 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8392 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
8393 (double const *)(addr), \
8394 (__v4di)(__m256i)(index), \
8395 (__mmask8)(mask), (int)(scale)); })
8397 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8398 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
8399 (long long const *)(addr), \
8400 (__v4di)(__m256i)(index), \
8401 (__mmask8)(mask), (int)(scale)); })
8403 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8404 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
8405 (float const *)(addr), \
8406 (__v2di)(__m128i)(index), \
8407 (__mmask8)(mask), (int)(scale)); })
8409 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8410 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8411 (int const *)(addr), \
8412 (__v2di)(__m128i)(index), \
8413 (__mmask8)(mask), (int)(scale)); })
8415 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8416 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8417 (float const *)(addr), \
8418 (__v4di)(__m256i)(index), \
8419 (__mmask8)(mask), (int)(scale)); })
8421 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8422 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8423 (int const *)(addr), \
8424 (__v4di)(__m256i)(index), \
8425 (__mmask8)(mask), (int)(scale)); })
8427 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8428 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8429 (double const *)(addr), \
8430 (__v4si)(__m128i)(index), \
8431 (__mmask8)(mask), (int)(scale)); })
8433 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8434 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8435 (long long const *)(addr), \
8436 (__v4si)(__m128i)(index), \
8437 (__mmask8)(mask), (int)(scale)); })
8439 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) __extension__ ({\
8440 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8441 (double const *)(addr), \
8442 (__v4si)(__m128i)(index), \
8443 (__mmask8)(mask), (int)(scale)); })
8445 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) __extension__ ({\
8446 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8447 (long long const *)(addr), \
8448 (__v4si)(__m128i)(index), \
8449 (__mmask8)(mask), (int)(scale)); })
8451 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8452 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8453 (float const *)(addr), \
8454 (__v4si)(__m128i)(index), \
8455 (__mmask8)(mask), (int)(scale)); })
8457 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8458 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8459 (int const *)(addr), \
8460 (__v4si)(__m128i)(index), \
8461 (__mmask8)(mask), (int)(scale)); })
8463 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) __extension__ ({\
8464 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8465 (float const *)(addr), \
8466 (__v8si)(__m256i)(index), \
8467 (__mmask8)(mask), (int)(scale)); })
8469 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) __extension__ ({\
8470 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8471 (int const *)(addr), \
8472 (__v8si)(__m256i)(index), \
8473 (__mmask8)(mask), (int)(scale)); })
8475 #define _mm256_permutex_pd(X, C) __extension__ ({ \
8476 (__m256d)__builtin_shufflevector((__v4df)(__m256d)(X), \
8477 (__v4df)_mm256_undefined_pd(), \
8478 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8479 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8481 #define _mm256_mask_permutex_pd(W, U, X, C) __extension__ ({ \
8482 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8483 (__v4df)_mm256_permutex_pd((X), (C)), \
8484 (__v4df)(__m256d)(W)); })
8486 #define _mm256_maskz_permutex_pd(U, X, C) __extension__ ({ \
8487 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8488 (__v4df)_mm256_permutex_pd((X), (C)), \
8489 (__v4df)_mm256_setzero_pd()); })
8491 #define _mm256_permutex_epi64(X, C) __extension__ ({ \
8492 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(X), \
8493 (__v4di)_mm256_undefined_si256(), \
8494 ((C) >> 0) & 0x3, ((C) >> 2) & 0x3, \
8495 ((C) >> 4) & 0x3, ((C) >> 6) & 0x3); })
8497 #define _mm256_mask_permutex_epi64(W, U, X, C) __extension__ ({ \
8498 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8499 (__v4di)_mm256_permutex_epi64((X), (C)), \
8500 (__v4di)(__m256i)(W)); })
8502 #define _mm256_maskz_permutex_epi64(U, X, C) __extension__ ({ \
8503 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8504 (__v4di)_mm256_permutex_epi64((X), (C)), \
8505 (__v4di)_mm256_setzero_si256()); })
8510 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8520 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8529 return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y,
8538 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8547 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8557 return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
8567 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8576 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8585 return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y,
8594 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8604 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8613 return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
8619 #define _mm_alignr_epi32(A, B, imm) __extension__ ({ \
8620 (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \
8621 (__v4si)(__m128i)(A), \
8622 ((int)(imm) & 0x3) + 0, \
8623 ((int)(imm) & 0x3) + 1, \
8624 ((int)(imm) & 0x3) + 2, \
8625 ((int)(imm) & 0x3) + 3); })
8627 #define _mm_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8628 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8629 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8630 (__v4si)(__m128i)(W)); })
8632 #define _mm_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8633 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8634 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8635 (__v4si)_mm_setzero_si128()); })
8637 #define _mm256_alignr_epi32(A, B, imm) __extension__ ({ \
8638 (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \
8639 (__v8si)(__m256i)(A), \
8640 ((int)(imm) & 0x7) + 0, \
8641 ((int)(imm) & 0x7) + 1, \
8642 ((int)(imm) & 0x7) + 2, \
8643 ((int)(imm) & 0x7) + 3, \
8644 ((int)(imm) & 0x7) + 4, \
8645 ((int)(imm) & 0x7) + 5, \
8646 ((int)(imm) & 0x7) + 6, \
8647 ((int)(imm) & 0x7) + 7); })
8649 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) __extension__ ({ \
8650 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8651 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8652 (__v8si)(__m256i)(W)); })
8654 #define _mm256_maskz_alignr_epi32(U, A, B, imm) __extension__ ({ \
8655 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8656 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8657 (__v8si)_mm256_setzero_si256()); })
8659 #define _mm_alignr_epi64(A, B, imm) __extension__ ({ \
8660 (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \
8661 (__v2di)(__m128i)(A), \
8662 ((int)(imm) & 0x1) + 0, \
8663 ((int)(imm) & 0x1) + 1); })
8665 #define _mm_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8666 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8667 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8668 (__v2di)(__m128i)(W)); })
8670 #define _mm_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8671 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8672 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8673 (__v2di)_mm_setzero_di()); })
8675 #define _mm256_alignr_epi64(A, B, imm) __extension__ ({ \
8676 (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \
8677 (__v4di)(__m256i)(A), \
8678 ((int)(imm) & 0x3) + 0, \
8679 ((int)(imm) & 0x3) + 1, \
8680 ((int)(imm) & 0x3) + 2, \
8681 ((int)(imm) & 0x3) + 3); })
8683 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) __extension__ ({ \
8684 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8685 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8686 (__v4di)(__m256i)(W)); })
8688 #define _mm256_maskz_alignr_epi64(U, A, B, imm) __extension__ ({ \
8689 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8690 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8691 (__v4di)_mm256_setzero_si256()); })
8696 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8704 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8712 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8720 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8728 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8736 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8744 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8752 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8757 #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\
8758 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8759 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8760 (__v8si)(__m256i)(W)); })
8762 #define _mm256_maskz_shuffle_epi32(U, A, I) __extension__({\
8763 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8764 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8765 (__v8si)_mm256_setzero_si256()); })
8767 #define _mm_mask_shuffle_epi32(W, U, A, I) __extension__({\
8768 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8769 (__v4si)_mm_shuffle_epi32((A), (I)), \
8770 (__v4si)(__m128i)(W)); })
8772 #define _mm_maskz_shuffle_epi32(U, A, I) __extension__({\
8773 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8774 (__v4si)_mm_shuffle_epi32((A), (I)), \
8775 (__v4si)_mm_setzero_si128()); })
8780 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
8788 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
8796 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
8804 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
8812 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
8820 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
8828 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
8836 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
8844 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8852 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8861 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8869 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8891 #define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
8892 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8893 (__v8hi)(__m128i)(W), \
8896 #define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
8897 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8898 (__v8hi)_mm_setzero_si128(), \
8916 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \
8917 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8918 (__v8hi)(__m128i)(W), \
8921 #define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \
8922 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8923 (__v8hi)_mm_setzero_si128(), \
8927 #undef __DEFAULT_FN_ATTRS
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi64(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcastss_ps(__m128 __X)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epi64_mask(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi64(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srav_epi32(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu32_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sllv_epi32(__m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcast_i32x4(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srlv_epi64(__m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi32(__m128i __V)
static __inline __m128 __DEFAULT_FN_ATTRS _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi64(__m256i __a, __m128i __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
static __inline __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_abs_epi32(__m256i __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastq_epi64(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a)
Moves and duplicates low-order (even-indexed) values from a 256-bit vector of [8 x float] to float va...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srlv_epi32(__m128i __X, __m128i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition. ...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi32(__m256i __a, __m256i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi8_epi64(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_ps(__mmask16 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mullo_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epu64_mask(__m128i __a, __m128i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order (even-indexed) values from two 128-bit vectors of [2 x double] and interleaves ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline __m128i __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) _mm_setzero_di(void)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp14_ps(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutexvar_epi32(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmplt_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epu32(__m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi32(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a)
Duplicates low-order (even-indexed) values from a 128-bit vector of [4 x float] to float values store...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_getexp_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastq_epi64(__m128i __X)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_ps(__mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcastd_epi32(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sll_epi32(__m256i __a, __m128i __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_andnot_si256(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt14_ps(__m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi32(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepu32_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_broadcastss_ps(__m128 __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_pd(__m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpneq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi16_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpgt_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_and_si256(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sllv_epi64(__m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi32(__m256i __a, int __count)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_add_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epi64_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi16(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_broadcastsd_pd(__m128d __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epu32(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ vector float vector float __b
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epi64_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi64(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srlv_epi32(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order (odd-indexed) values from two 128-bit vectors of [2 x double] and interleaves ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_slli_epi64(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mul_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_rcp14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi64(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmplt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpneq_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
#define __DEFAULT_FN_ATTRS
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcastd_epi32(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi64(__m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi32(__m128i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline __m128d __DEFAULT_FN_ATTRS _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi16(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_cvtepu32_pd(__m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu32(__m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_xor_si256(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srai_epi32(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi64(__m128i __A, int __imm)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_scalef_ps(__m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_rcp14_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srav_epi32(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order (odd-indexed) values from two 128-bit vectors of [2 x i64] and interleaves the...
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi64(__m128i __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_or_si256(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi64(__m128i __A, __m128i __B)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi64(__m128i __V)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epi32_mask(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epu64_mask(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a)
Moves and duplicates high-order (odd-indexed) values from a 128-bit vector of [4 x float] to float va...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_cvtepi32_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epu64_mask(__m256i __a, __m256i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_permutexvar_ps(__m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmple_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rcp14_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpge_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpge_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epu64_mask(__m256i __a, __m256i __b)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmplt_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sub_epi32(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi32(__m256i __a, __m128i __count)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpgt_epu32_mask(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sllv_epi64(__m256i __X, __m256i __Y)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a)
Moves and duplicates high-order (odd-indexed) values from a 256-bit vector of [8 x float] to float va...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_sra_epi32(__m256i __a, __m128i __count)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mul_epu32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmple_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srai_epi64(__m256i __A, int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srl_epi64(__m256i __a, __m128i __count)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu8_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srlv_epi64(__m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpgt_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_getexp_ps(__m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmple_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpeq_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_getexp_ps(__m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmpneq_epu32_mask(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_mask_cmpneq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_mask_cmplt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_getexp_pd(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_cvtepu16_epi32(__m128i __V)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm_cmpge_epi64_mask(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values...
static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_srli_epi32(__m256i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS _mm256_cmple_epi64_mask(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)