11 #error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead." 14 #ifndef __AVX512VLBWINTRIN_H 15 #define __AVX512VLBWINTRIN_H 18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128))) 19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256))) 23 #define _mm_cmp_epi8_mask(a, b, p) \ 24 (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 25 (__v16qi)(__m128i)(b), (int)(p), \ 28 #define _mm_mask_cmp_epi8_mask(m, a, b, p) \ 29 (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 30 (__v16qi)(__m128i)(b), (int)(p), \ 33 #define _mm_cmp_epu8_mask(a, b, p) \ 34 (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 35 (__v16qi)(__m128i)(b), (int)(p), \ 38 #define _mm_mask_cmp_epu8_mask(m, a, b, p) \ 39 (__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 40 (__v16qi)(__m128i)(b), (int)(p), \ 43 #define _mm256_cmp_epi8_mask(a, b, p) \ 44 (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 45 (__v32qi)(__m256i)(b), (int)(p), \ 48 #define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ 49 (__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 50 (__v32qi)(__m256i)(b), (int)(p), \ 53 #define _mm256_cmp_epu8_mask(a, b, p) \ 54 (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 55 (__v32qi)(__m256i)(b), (int)(p), \ 58 #define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ 59 (__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 60 (__v32qi)(__m256i)(b), (int)(p), \ 63 #define _mm_cmp_epi16_mask(a, b, p) \ 64 (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 65 (__v8hi)(__m128i)(b), (int)(p), \ 68 #define _mm_mask_cmp_epi16_mask(m, a, b, p) \ 69 (__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 70 (__v8hi)(__m128i)(b), (int)(p), \ 73 #define _mm_cmp_epu16_mask(a, b, p) \ 74 (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 75 (__v8hi)(__m128i)(b), (int)(p), \ 78 #define _mm_mask_cmp_epu16_mask(m, a, b, p) \ 79 (__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 80 (__v8hi)(__m128i)(b), (int)(p), \ 83 #define _mm256_cmp_epi16_mask(a, b, p) \ 84 (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 85 (__v16hi)(__m256i)(b), (int)(p), \ 88 #define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ 89 (__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 90 (__v16hi)(__m256i)(b), (int)(p), \ 93 #define _mm256_cmp_epu16_mask(a, b, p) \ 94 (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 95 (__v16hi)(__m256i)(b), (int)(p), \ 98 #define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ 99 (__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 100 (__v16hi)(__m256i)(b), (int)(p), \ 103 #define _mm_cmpeq_epi8_mask(A, B) \ 104 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 105 #define _mm_mask_cmpeq_epi8_mask(k, A, B) \ 106 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 107 #define _mm_cmpge_epi8_mask(A, B) \ 108 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 109 #define _mm_mask_cmpge_epi8_mask(k, A, B) \ 110 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 111 #define _mm_cmpgt_epi8_mask(A, B) \ 112 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 113 #define _mm_mask_cmpgt_epi8_mask(k, A, B) \ 114 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 115 #define _mm_cmple_epi8_mask(A, B) \ 116 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 117 #define _mm_mask_cmple_epi8_mask(k, A, B) \ 118 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 119 #define _mm_cmplt_epi8_mask(A, B) \ 120 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 121 #define _mm_mask_cmplt_epi8_mask(k, A, B) \ 122 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 123 #define _mm_cmpneq_epi8_mask(A, B) \ 124 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 125 #define _mm_mask_cmpneq_epi8_mask(k, A, B) \ 126 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 128 #define _mm256_cmpeq_epi8_mask(A, B) \ 129 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 130 #define _mm256_mask_cmpeq_epi8_mask(k, A, B) \ 131 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 132 #define _mm256_cmpge_epi8_mask(A, B) \ 133 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 134 #define _mm256_mask_cmpge_epi8_mask(k, A, B) \ 135 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 136 #define _mm256_cmpgt_epi8_mask(A, B) \ 137 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 138 #define _mm256_mask_cmpgt_epi8_mask(k, A, B) \ 139 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 140 #define _mm256_cmple_epi8_mask(A, B) \ 141 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 142 #define _mm256_mask_cmple_epi8_mask(k, A, B) \ 143 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 144 #define _mm256_cmplt_epi8_mask(A, B) \ 145 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 146 #define _mm256_mask_cmplt_epi8_mask(k, A, B) \ 147 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 148 #define _mm256_cmpneq_epi8_mask(A, B) \ 149 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 150 #define _mm256_mask_cmpneq_epi8_mask(k, A, B) \ 151 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 153 #define _mm_cmpeq_epu8_mask(A, B) \ 154 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 155 #define _mm_mask_cmpeq_epu8_mask(k, A, B) \ 156 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 157 #define _mm_cmpge_epu8_mask(A, B) \ 158 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 159 #define _mm_mask_cmpge_epu8_mask(k, A, B) \ 160 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 161 #define _mm_cmpgt_epu8_mask(A, B) \ 162 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 163 #define _mm_mask_cmpgt_epu8_mask(k, A, B) \ 164 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 165 #define _mm_cmple_epu8_mask(A, B) \ 166 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 167 #define _mm_mask_cmple_epu8_mask(k, A, B) \ 168 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 169 #define _mm_cmplt_epu8_mask(A, B) \ 170 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 171 #define _mm_mask_cmplt_epu8_mask(k, A, B) \ 172 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 173 #define _mm_cmpneq_epu8_mask(A, B) \ 174 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 175 #define _mm_mask_cmpneq_epu8_mask(k, A, B) \ 176 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 178 #define _mm256_cmpeq_epu8_mask(A, B) \ 179 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 180 #define _mm256_mask_cmpeq_epu8_mask(k, A, B) \ 181 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 182 #define _mm256_cmpge_epu8_mask(A, B) \ 183 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 184 #define _mm256_mask_cmpge_epu8_mask(k, A, B) \ 185 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 186 #define _mm256_cmpgt_epu8_mask(A, B) \ 187 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 188 #define _mm256_mask_cmpgt_epu8_mask(k, A, B) \ 189 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 190 #define _mm256_cmple_epu8_mask(A, B) \ 191 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 192 #define _mm256_mask_cmple_epu8_mask(k, A, B) \ 193 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 194 #define _mm256_cmplt_epu8_mask(A, B) \ 195 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 196 #define _mm256_mask_cmplt_epu8_mask(k, A, B) \ 197 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 198 #define _mm256_cmpneq_epu8_mask(A, B) \ 199 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 200 #define _mm256_mask_cmpneq_epu8_mask(k, A, B) \ 201 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 203 #define _mm_cmpeq_epi16_mask(A, B) \ 204 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 205 #define _mm_mask_cmpeq_epi16_mask(k, A, B) \ 206 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 207 #define _mm_cmpge_epi16_mask(A, B) \ 208 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 209 #define _mm_mask_cmpge_epi16_mask(k, A, B) \ 210 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 211 #define _mm_cmpgt_epi16_mask(A, B) \ 212 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 213 #define _mm_mask_cmpgt_epi16_mask(k, A, B) \ 214 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 215 #define _mm_cmple_epi16_mask(A, B) \ 216 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 217 #define _mm_mask_cmple_epi16_mask(k, A, B) \ 218 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 219 #define _mm_cmplt_epi16_mask(A, B) \ 220 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 221 #define _mm_mask_cmplt_epi16_mask(k, A, B) \ 222 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 223 #define _mm_cmpneq_epi16_mask(A, B) \ 224 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 225 #define _mm_mask_cmpneq_epi16_mask(k, A, B) \ 226 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 228 #define _mm256_cmpeq_epi16_mask(A, B) \ 229 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 230 #define _mm256_mask_cmpeq_epi16_mask(k, A, B) \ 231 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 232 #define _mm256_cmpge_epi16_mask(A, B) \ 233 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 234 #define _mm256_mask_cmpge_epi16_mask(k, A, B) \ 235 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 236 #define _mm256_cmpgt_epi16_mask(A, B) \ 237 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 238 #define _mm256_mask_cmpgt_epi16_mask(k, A, B) \ 239 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 240 #define _mm256_cmple_epi16_mask(A, B) \ 241 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 242 #define _mm256_mask_cmple_epi16_mask(k, A, B) \ 243 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 244 #define _mm256_cmplt_epi16_mask(A, B) \ 245 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 246 #define _mm256_mask_cmplt_epi16_mask(k, A, B) \ 247 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 248 #define _mm256_cmpneq_epi16_mask(A, B) \ 249 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 250 #define _mm256_mask_cmpneq_epi16_mask(k, A, B) \ 251 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 253 #define _mm_cmpeq_epu16_mask(A, B) \ 254 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 255 #define _mm_mask_cmpeq_epu16_mask(k, A, B) \ 256 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 257 #define _mm_cmpge_epu16_mask(A, B) \ 258 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 259 #define _mm_mask_cmpge_epu16_mask(k, A, B) \ 260 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 261 #define _mm_cmpgt_epu16_mask(A, B) \ 262 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 263 #define _mm_mask_cmpgt_epu16_mask(k, A, B) \ 264 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 265 #define _mm_cmple_epu16_mask(A, B) \ 266 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 267 #define _mm_mask_cmple_epu16_mask(k, A, B) \ 268 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 269 #define _mm_cmplt_epu16_mask(A, B) \ 270 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 271 #define _mm_mask_cmplt_epu16_mask(k, A, B) \ 272 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 273 #define _mm_cmpneq_epu16_mask(A, B) \ 274 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 275 #define _mm_mask_cmpneq_epu16_mask(k, A, B) \ 276 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 278 #define _mm256_cmpeq_epu16_mask(A, B) \ 279 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 280 #define _mm256_mask_cmpeq_epu16_mask(k, A, B) \ 281 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 282 #define _mm256_cmpge_epu16_mask(A, B) \ 283 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 284 #define _mm256_mask_cmpge_epu16_mask(k, A, B) \ 285 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 286 #define _mm256_cmpgt_epu16_mask(A, B) \ 287 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 288 #define _mm256_mask_cmpgt_epu16_mask(k, A, B) \ 289 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 290 #define _mm256_cmple_epu16_mask(A, B) \ 291 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 292 #define _mm256_mask_cmple_epu16_mask(k, A, B) \ 293 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 294 #define _mm256_cmplt_epu16_mask(A, B) \ 295 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 296 #define _mm256_mask_cmplt_epu16_mask(k, A, B) \ 297 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 298 #define _mm256_cmpneq_epu16_mask(A, B) \ 299 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 300 #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ 301 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 305 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
312 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
319 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
326 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
333 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
340 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
347 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
354 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
361 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
368 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
375 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
382 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
389 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
396 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
403 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
410 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
417 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
424 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
431 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
438 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
446 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
454 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
462 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
470 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
478 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
486 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
494 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
502 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
510 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
518 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
526 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
534 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
541 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
549 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
557 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
565 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
573 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
581 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
589 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
597 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
605 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
613 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
621 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
629 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
637 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
645 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
653 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
661 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
669 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
677 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
685 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
693 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
701 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
709 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
717 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
725 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
733 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
741 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
749 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
757 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
765 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
773 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
781 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
789 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
797 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
805 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
813 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
821 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
829 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
837 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
845 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
853 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
861 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
869 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
877 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
885 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
893 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
901 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
909 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
917 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
925 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
933 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
941 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
949 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
957 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
965 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
973 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
981 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
989 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
997 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1005 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1013 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1021 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1029 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1037 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1045 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1053 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1061 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1069 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1077 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__M,
1085 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1093 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
1101 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1109 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
1117 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1125 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1133 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1141 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1149 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1157 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1165 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1173 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1181 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1189 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1197 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1205 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1213 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1221 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1229 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1237 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1245 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1253 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1261 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1269 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1277 return (__m128i)__builtin_ia32_vpermi2varhi128((
__v8hi)__A, (
__v8hi)__I,
1285 return (__m128i)__builtin_ia32_selectw_128(__U,
1294 return (__m128i)__builtin_ia32_selectw_128(__U,
1303 return (__m128i)__builtin_ia32_selectw_128(__U,
1311 return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
1319 return (__m256i)__builtin_ia32_selectw_256(__U,
1328 return (__m256i)__builtin_ia32_selectw_256(__U,
1337 return (__m256i)__builtin_ia32_selectw_256(__U,
1344 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1351 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1359 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1366 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1373 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1380 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1387 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
1394 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
1401 return (__m128i) __builtin_ia32_pmovswb128_mask ((
__v8hi) __A,
1408 return (__m128i) __builtin_ia32_pmovswb128_mask ((
__v8hi) __A,
1415 return (__m128i) __builtin_ia32_pmovswb128_mask ((
__v8hi) __A,
1422 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1429 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1436 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
1443 return (__m128i) __builtin_ia32_pmovuswb128_mask ((
__v8hi) __A,
1450 return (__m128i) __builtin_ia32_pmovuswb128_mask ((
__v8hi) __A,
1457 return (__m128i) __builtin_ia32_pmovuswb128_mask ((
__v8hi) __A,
1464 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1471 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1478 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
1485 return (__m128i)__builtin_shufflevector(
1486 __builtin_convertvector((
__v8hi)__A, __v8qi),
1487 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1493 return (__m128i) __builtin_ia32_pmovwb128_mask ((
__v8hi) __A,
1500 return (__m128i) __builtin_ia32_pmovwb128_mask ((
__v8hi) __A,
1508 __builtin_ia32_pmovwb128mem_mask ((
__v16qi *) __P, (
__v8hi) __A, __M);
1515 __builtin_ia32_pmovswb128mem_mask ((
__v16qi *) __P, (
__v8hi) __A, __M);
1521 __builtin_ia32_pmovuswb128mem_mask ((
__v16qi *) __P, (
__v8hi) __A, __M);
1526 return (__m128i)__builtin_convertvector((__v16hi) __A,
__v16qi);
1531 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1538 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__M,
1546 __builtin_ia32_pmovwb256mem_mask ((
__v16qi *) __P, (__v16hi) __A, __M);
1552 __builtin_ia32_pmovswb256mem_mask ((
__v16qi *) __P, (__v16hi) __A, __M);
1558 __builtin_ia32_pmovuswb256mem_mask ((
__v16qi*) __P, (__v16hi) __A, __M);
1563 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1570 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1577 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1584 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1591 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1598 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1605 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1612 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1619 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1626 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1633 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1640 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1647 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1654 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1661 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1668 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1675 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1682 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1689 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1696 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1703 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1710 return (__m128i)__builtin_ia32_selectb_128((
__mmask16)__U,
1717 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1724 return (__m256i)__builtin_ia32_selectb_256((
__mmask32)__U,
1731 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1738 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1745 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1752 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1760 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1768 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1776 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1784 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1793 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1801 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1809 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1817 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1823 #define _mm_mask_shufflehi_epi16(W, U, A, imm) \ 1824 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1825 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1826 (__v8hi)(__m128i)(W)) 1828 #define _mm_maskz_shufflehi_epi16(U, A, imm) \ 1829 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1830 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1831 (__v8hi)_mm_setzero_si128()) 1833 #define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ 1834 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1835 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1836 (__v16hi)(__m256i)(W)) 1838 #define _mm256_maskz_shufflehi_epi16(U, A, imm) \ 1839 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1840 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1841 (__v16hi)_mm256_setzero_si256()) 1843 #define _mm_mask_shufflelo_epi16(W, U, A, imm) \ 1844 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1845 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1846 (__v8hi)(__m128i)(W)) 1848 #define _mm_maskz_shufflelo_epi16(U, A, imm) \ 1849 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1850 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1851 (__v8hi)_mm_setzero_si128()) 1853 #define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ 1854 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1855 (__v16hi)_mm256_shufflelo_epi16((A), \ 1857 (__v16hi)(__m256i)(W)) 1859 #define _mm256_maskz_shufflelo_epi16(U, A, imm) \ 1860 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1861 (__v16hi)_mm256_shufflelo_epi16((A), \ 1863 (__v16hi)_mm256_setzero_si256()) 1868 return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
1874 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1882 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1890 return (__m128i)__builtin_ia32_psllv8hi((
__v8hi)__A, (
__v8hi)__B);
1896 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1904 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1912 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1920 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1928 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1936 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1944 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1952 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
1960 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1968 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1976 return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
1982 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1990 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
1998 return (__m128i)__builtin_ia32_psrlv8hi((
__v8hi)__A, (
__v8hi)__B);
2004 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2012 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2020 return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
2026 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2034 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2042 return (__m128i)__builtin_ia32_psrav8hi((
__v8hi)__A, (
__v8hi)__B);
2048 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2056 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2064 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2072 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2080 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2088 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2096 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2104 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2112 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2120 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2128 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2136 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2144 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2152 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2160 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2168 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__U,
2176 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2184 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__U,
2192 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
2200 return (__m128i) __builtin_ia32_selectw_128 ((
__mmask8) __U,
2208 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
2216 return (__m256i) __builtin_ia32_selectw_256 ((
__mmask16) __U,
2224 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
2232 return (__m128i) __builtin_ia32_selectb_128 ((
__mmask16) __U,
2240 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
2248 return (__m256i) __builtin_ia32_selectb_256 ((
__mmask32) __U,
2257 return (__m128i) __builtin_ia32_selectb_128(__M,
2265 return (__m128i) __builtin_ia32_selectb_128(__M,
2273 return (__m256i) __builtin_ia32_selectb_256(__M,
2281 return (__m256i) __builtin_ia32_selectb_256(__M,
2289 struct __loadu_epi16 {
2292 return ((
struct __loadu_epi16*)__P)->__v;
2298 return (__m128i) __builtin_ia32_loaddquhi128_mask ((
__v8hi *) __P,
2306 return (__m128i) __builtin_ia32_loaddquhi128_mask ((
__v8hi *) __P,
2315 struct __loadu_epi16 {
2318 return ((
struct __loadu_epi16*)__P)->__v;
2324 return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
2332 return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
2341 struct __loadu_epi8 {
2344 return ((
struct __loadu_epi8*)__P)->__v;
2350 return (__m128i) __builtin_ia32_loaddquqi128_mask ((
__v16qi *) __P,
2358 return (__m128i) __builtin_ia32_loaddquqi128_mask ((
__v16qi *) __P,
2367 struct __loadu_epi8 {
2370 return ((
struct __loadu_epi8*)__P)->__v;
2376 return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
2384 return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
2393 struct __storeu_epi16 {
2396 ((
struct __storeu_epi16*)__P)->__v =
__A;
2402 __builtin_ia32_storedquhi128_mask ((
__v8hi *) __P,
2410 struct __storeu_epi16 {
2413 ((
struct __storeu_epi16*)__P)->__v =
__A;
2419 __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
2427 struct __storeu_epi8 {
2430 ((
struct __storeu_epi8*)__P)->__v =
__A;
2436 __builtin_ia32_storedquqi128_mask ((
__v16qi *) __P,
2444 struct __storeu_epi8 {
2447 ((
struct __storeu_epi8*)__P)->__v =
__A;
2453 __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
2574 return (
__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
2586 return (
__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
2592 return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
2598 return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
2604 return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
2610 return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
2616 return (__m128i)__builtin_ia32_selectb_128(__M,
2624 return (__m128i)__builtin_ia32_selectb_128(__M,
2632 return (__m256i)__builtin_ia32_selectb_256(__M,
2640 return (__m256i)__builtin_ia32_selectb_256(__M,
2648 return (__m128i)__builtin_ia32_selectw_128(__M,
2656 return (__m128i)__builtin_ia32_selectw_128(__M,
2664 return (__m256i)__builtin_ia32_selectw_256(__M,
2672 return (__m256i)__builtin_ia32_selectw_256(__M,
2680 return (__m256i) __builtin_ia32_selectw_256 (__M,
2688 return (__m256i) __builtin_ia32_selectw_256(__M,
2696 return (__m128i) __builtin_ia32_selectw_128(__M,
2704 return (__m128i) __builtin_ia32_selectw_128(__M,
2712 return (__m128i)__builtin_ia32_permvarhi128((
__v8hi) __B, (
__v8hi) __A);
2718 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
2727 return (__m128i)__builtin_ia32_selectw_128((
__mmask8)__M,
2735 return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
2742 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
2751 return (__m256i)__builtin_ia32_selectw_256((
__mmask16)__M,
2756 #define _mm_mask_alignr_epi8(W, U, A, B, N) \ 2757 (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 2758 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2759 (__v16qi)(__m128i)(W)) 2761 #define _mm_maskz_alignr_epi8(U, A, B, N) \ 2762 (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 2763 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2764 (__v16qi)_mm_setzero_si128()) 2766 #define _mm256_mask_alignr_epi8(W, U, A, B, N) \ 2767 (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 2768 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2769 (__v32qi)(__m256i)(W)) 2771 #define _mm256_maskz_alignr_epi8(U, A, B, N) \ 2772 (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 2773 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2774 (__v32qi)_mm256_setzero_si256()) 2776 #define _mm_dbsad_epu8(A, B, imm) \ 2777 (__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ 2778 (__v16qi)(__m128i)(B), (int)(imm)) 2780 #define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ 2781 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 2782 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 2783 (__v8hi)(__m128i)(W)) 2785 #define _mm_maskz_dbsad_epu8(U, A, B, imm) \ 2786 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 2787 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 2788 (__v8hi)_mm_setzero_si128()) 2790 #define _mm256_dbsad_epu8(A, B, imm) \ 2791 (__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ 2792 (__v32qi)(__m256i)(B), (int)(imm)) 2794 #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ 2795 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 2796 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 2797 (__v16hi)(__m256i)(W)) 2799 #define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ 2800 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 2801 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 2802 (__v16hi)_mm256_setzero_si256()) 2804 #undef __DEFAULT_FN_ATTRS128 2805 #undef __DEFAULT_FN_ATTRS256 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi16(__m256i __A, __m256i __B)
#define _mm_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastw_epi16(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
struct __storeu_i16 *__P __v
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
vector signed char unaligned_vec_schar __attribute__((aligned(1)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi8(void const *__P)
__vector signed char __v16qi
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit signed integers...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask(__mmask16 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi8(char __b)
Constructs a 256-bit integer vector of [32 x i8], with each of the 8-bit integral vector elements set...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastb_epi8(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi16(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1, __m128i __V2)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit unsigned intege...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi8(__mmask16 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_test_epi8_mask(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
__inline __m128 const float __Y
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8(void *__P, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit unsigned integer values in the input and returns the differences in th...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi8(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
__inline __m128 __m64 const * __P
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_mask_cmpeq_epi16_mask(k, A, B)
#define _mm_cmpeq_epi16_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8(__m256i __W, __mmask32 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi16(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, int __B)
#define _mm256_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8(void *__P, __mmask32 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
#define _mm_cmpneq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
__inline __m128d double __X
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi8(__m128i __W, __mmask16 __U, __m128i __A)
#define _mm_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8(__mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8(__m128i __O, __mmask16 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit signed integers...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi16(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi16(__m128i __V)
Zero-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_mask_cmpneq_epi16_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8(void *__P, __mmask16 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
#define _mm256_cmpeq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu16(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [8 x u16] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
__inline void __m128d __A
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_testn_epi8_mask(__m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi16(short __w)
Constructs a 256-bit integer vector of [16 x i16], with each of the 16-bit integral vector elements s...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8(__m128i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8(__mmask32 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi16(__mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16(__mmask8 __M, short __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8(__mmask32 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16(__mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
__inline __m128d __m128d __B
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8(__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask(__mmask16 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi8(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi8(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [16 x i8] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16(__m256i __W, __mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16(void *__P, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8(__mmask16 __M, char __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a, int __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
__inline __m128 const float const float const float __W
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shuffle_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi8(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
__inline void enum _mm_hint __I
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a, __m256i __b)
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi16(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16(__mmask16 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi16(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi16(__m128i __A, __m128i __B)
#define _mm_mask_cmpeq_epi16_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epu16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a)
#define _mm256_cmpeq_epi16_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_test_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit signed integer values in the input and returns the differences in the ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastw_epi16(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi16(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16(__mmask16 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastb_epi8(__m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit unsigned integer...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit unsigned integer values in the input and returns the differences in the...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
#define _mm256_mask_cmpeq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi16(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
#define _mm_cmpeq_epi8_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded avarages of corresponding elements of two 128-bit unsigned [8 x i16] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8(__mmask32 __U, void const *__P)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu8(__mmask32 __M, __m256i __A, __m256i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi16(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a)
#define _mm256_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit signed integer values in the input and returns the differences in the c...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi16(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi8(__mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A)
#define _mm256_cmpneq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi16(__m128i __V)
Sign-extends each of the lower eight 8-bit integer elements of a 128-bit vector of [16 x i8] to 16-bi...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi16(__mmask16 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi8(__mmask32 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maddubs_epi16(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask(__mmask32 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded avarages of corresponding elements of two 128-bit unsigned [16 x i8] vectors...
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi32(__m256i __V1, __m256i __V2)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi16(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi8(__mmask16 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi8(__m256i __W, __mmask32 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi16(void *__P, __m256i __A)
#define _mm_mask_cmpneq_epi8_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A)
#define _mm_cmpneq_epi8_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi16(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b)