27 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
31 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
35 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
39 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
47 #if !defined(_MSC_VER) || __has_feature(modules) || \
48 (defined(__SSE4_2__) || defined(__SSE4_1__))
52 #if !defined(_MSC_VER) || __has_feature(modules) || \
53 (defined(__AES__) || defined(__PCLMUL__))
57 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
61 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
65 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
95 #define _mm256_cvtps_ph(a, imm) __extension__ ({ \
96 (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
110 static __inline __m256
__attribute__((__always_inline__, __nodebug__, __target__(
"f16c")))
111 _mm256_cvtph_ps(__m128i __a)
113 return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
117 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
121 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
125 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
129 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
133 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
137 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
141 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
145 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
149 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
153 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
157 #if !defined(_MSC_VER) || __has_feature(modules) || \
158 (defined(__AVX512VL__) && defined(__AVX512BW__))
162 #if !defined(_MSC_VER) || __has_feature(modules) || \
163 (defined(__AVX512VL__) && defined(__AVX512CD__))
167 #if !defined(_MSC_VER) || __has_feature(modules) || \
168 (defined(__AVX512VL__) && defined(__AVX512DQ__))
172 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
176 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
180 #if !defined(_MSC_VER) || __has_feature(modules) || \
181 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
185 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
189 #if !defined(_MSC_VER) || __has_feature(modules) || \
190 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
194 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
198 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
202 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
203 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
204 _rdrand16_step(
unsigned short *
__p)
206 return __builtin_ia32_rdrand16_step(__p);
209 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
210 _rdrand32_step(
unsigned int *
__p)
212 return __builtin_ia32_rdrand32_step(__p);
216 static __inline__
int __attribute__((__always_inline__, __nodebug__, __target__(
"rdrnd")))
217 _rdrand64_step(
unsigned long long *
__p)
219 return __builtin_ia32_rdrand64_step(__p);
226 _bit_scan_forward(
int __A) {
227 return __builtin_ctz(__A);
231 static __inline__
int __attribute__((__always_inline__, __nodebug__))
232 _bit_scan_reverse(
int __A) {
233 return 31 - __builtin_clz(__A);
236 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
238 static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
239 _readfsbase_u32(
void)
241 return __builtin_ia32_rdfsbase32();
244 static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
245 _readfsbase_u64(
void)
247 return __builtin_ia32_rdfsbase64();
250 static __inline__
unsigned int __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
251 _readgsbase_u32(
void)
253 return __builtin_ia32_rdgsbase32();
256 static __inline__
unsigned long long __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
257 _readgsbase_u64(
void)
259 return __builtin_ia32_rdgsbase64();
262 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
263 _writefsbase_u32(
unsigned int __V)
265 return __builtin_ia32_wrfsbase32(__V);
268 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
269 _writefsbase_u64(
unsigned long long __V)
271 return __builtin_ia32_wrfsbase64(__V);
274 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
275 _writegsbase_u32(
unsigned int __V)
277 return __builtin_ia32_wrgsbase32(__V);
280 static __inline__
void __attribute__((__always_inline__, __nodebug__, __target__(
"fsgsbase")))
281 _writegsbase_u64(
unsigned long long __V)
283 return __builtin_ia32_wrgsbase64(__V);
289 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
294 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
298 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
302 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
306 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
310 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
314 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
static __inline__ uint32_t volatile uint32_t * __p
static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) _mm256_cvtph_ps(__m128i __a)
Converts a 128-bit vector containing 16-bit half-precision float values into a 256-bit vector of [8 x...