13 #ifndef NO_WARN_X86_INTRINSICS 32 #error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." 46 typedef __vector
unsigned long long __v2du;
48 typedef __vector
unsigned int __v4su;
50 typedef __vector
unsigned short __v8hu;
56 typedef long long __m128i
__attribute__ ((__vector_size__ (16), __may_alias__));
57 typedef double __m128d
__attribute__ ((__vector_size__ (16), __may_alias__));
60 typedef long long __m128i_u
__attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
61 typedef double __m128d_u
__attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
64 #define _MM_SHUFFLE2(x,y) (((x) << 1) | (y)) 67 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
70 return __extension__ (__m128d){ __F, 0.0 };
74 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
77 return __extension__ (__m128d){ __F, __F };
80 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
87 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
90 return __extension__ (__m128d){
__X, __W };
94 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
97 return __extension__ (__m128d){
__W, __X };
101 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
109 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
116 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
125 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
132 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
135 return (vec_vsx_ld(0, __P));
139 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
146 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
152 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
159 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
163 return (__m128d)vec_xxpermdi (__tmp, __tmp, 2);
167 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
177 *(__m128d_u *)__P = __A;
181 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
187 extern __inline
double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
193 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
200 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
207 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
213 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
220 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
227 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
228 _mm_cvtsi128_si64 (__m128i __A)
234 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
235 _mm_cvtsi128_si64x (__m128i __A)
240 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
249 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
252 __A[0] = __A[0] + __B[0];
256 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
262 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
265 __A[0] = __A[0] - __B[0];
269 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
275 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
278 __A[0] = __A[0] * __B[0];
282 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
288 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
291 __A[0] = __A[0] / __B[0];
295 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
298 return (vec_sqrt (__A));
302 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
310 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
316 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
326 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
332 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
342 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
348 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
354 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
360 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
366 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
372 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
376 return ((__m128d)
vec_nor (temp, temp));
379 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
385 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
391 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
397 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
403 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
421 return ((__m128d)
vec_and(c, d));
424 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
435 return ((__m128d)vec_orc(c, d));
444 return ((__m128d)
vec_or(c, d));
448 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
464 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
474 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
484 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
494 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
504 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
515 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
526 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
537 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
548 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
559 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
567 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
582 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
585 return (__A[0] == __B[0]);
588 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
591 return (__A[0] < __B[0]);
594 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
597 return (__A[0] <= __B[0]);
600 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
603 return (__A[0] > __B[0]);
606 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
609 return (__A[0] >= __B[0]);
612 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
615 return (__A[0] != __B[0]);
618 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
621 return (__A[0] == __B[0]);
624 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
627 return (__A[0] < __B[0]);
630 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
633 return (__A[0] <= __B[0]);
636 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
639 return (__A[0] > __B[0]);
642 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
645 return (__A[0] >= __B[0]);
648 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
651 return (__A[0] != __B[0]);
655 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
658 return __extension__ (__m128i)(
__v2di){
__q0, __q1 };
661 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
667 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
673 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
677 return __extension__ (__m128i)(
__v8hi){
681 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
687 return __extension__ (__m128i)(
__v16qi){
694 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
700 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
706 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
712 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
715 return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
718 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
721 return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
722 __A, __A, __A, __A, __A, __A, __A, __A);
727 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
733 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
739 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
743 return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
746 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
752 return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
753 __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
757 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
763 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
766 return (__m128i) (vec_vsx_ld(0, (
signed int const *)__P));
769 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
775 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
781 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
787 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
793 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
799 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
805 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
812 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
820 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
823 return __extension__ (__m128i)(
__v4si){ 0, 0, 0, 0 };
827 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
835 return (__m128d)
vec_ctf (val, 0);
839 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
845 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
848 __v2df rounded = vec_rint (__A);
862 temp = vec_mergeo (temp, temp);
863 result = (
__v4si) vec_vpkudum ((__vector
long long) temp,
864 (__vector
long long) vzero);
867 const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
868 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f };
875 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
880 return (
__m64) result[0];
883 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
888 const __v4si vzero = { 0, 0, 0, 0 };
897 temp = vec_mergeo (temp, temp);
898 result = (__v4sf) vec_vpkudum ((__vector
long long) temp,
899 (__vector
long long) vzero);
902 const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
903 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f };
910 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
915 const __v4si vzero = { 0, 0, 0, 0 };
926 temp = vec_mergeo (temp, temp);
927 result = (
__v4si) vec_vpkudum ((__vector
long long) temp,
928 (__vector
long long) vzero);
931 const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b,
932 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f };
937 return ((__m128i)
result);
940 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
945 return (
__m64) result[0];
948 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
955 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
964 result =
vec_ctf ((__vector
signed long long) tmp2, 0);
969 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
975 rounded = vec_rint((__v4sf) __A);
980 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
985 result =
vec_cts ((__v4sf) __A, 0);
989 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
994 return (__m128d) vec_doubleh ((__v4sf)__A);
998 __v4sf a = (__v4sf)__A;
1001 #ifdef __LITTLE_ENDIAN__ 1006 temp = __builtin_vsx_xxsldwi (a, a, 3);
1007 temp = __builtin_vsx_xxsldwi (a, temp, 2);
1024 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1028 int result = ((
__v2df)rounded)[0];
1033 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1034 _mm_cvtsd_si64 (__m128d __A)
1037 long long result = ((
__v2df) rounded)[0];
1043 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1044 _mm_cvtsd_si64x (__m128d __A)
1046 return _mm_cvtsd_si64 ((
__v2df)__A);
1049 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1052 int result = ((
__v2df)__A)[0];
1058 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1059 _mm_cvttsd_si64 (__m128d __A)
1061 long long result = ((
__v2df)__A)[0];
1067 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1068 _mm_cvttsd_si64x (__m128d __A)
1070 return _mm_cvttsd_si64 (__A);
1073 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1076 __v4sf result = (__v4sf)__A;
1078 #ifdef __LITTLE_ENDIAN__ 1084 result = __builtin_vsx_xxsldwi (result, result, 3);
1092 result = __builtin_vsx_xxsldwi (result, temp_s, 1);
1094 result [0] = ((
__v2df)__B)[0];
1099 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1109 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1110 _mm_cvtsi64_sd (__m128d __A,
long long __B)
1119 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm_cvtsi64x_sd (__m128d __A,
long long __B)
1122 return _mm_cvtsi64_sd (__A, __B);
1125 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1128 #ifdef __LITTLE_ENDIAN__ 1130 __v4sf temp =
vec_splat ((__v4sf)__B, 0);
1141 res [0] = ((__v4sf)__B) [0];
1142 return (__m128d)
res;
1146 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1155 else if (litmsk == 1)
1156 result = vec_xxpermdi (__B, __A, 2);
1157 else if (litmsk == 2)
1158 result = vec_xxpermdi (__B, __A, 1);
1160 else if (litmsk == 1)
1161 result = vec_xxpermdi (__A, __B, 2);
1162 else if (litmsk == 2)
1163 result = vec_xxpermdi (__A, __B, 1);
1171 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1177 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1183 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1191 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1203 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1206 __vector
unsigned long long result;
1207 static const __vector
unsigned int perm_mask =
1209 #ifdef __LITTLE_ENDIAN__ 1210 0x80800040, 0x80808080, 0x80808080, 0x80808080
1212 0x80808080, 0x80808080, 0x80808080, 0x80804000
1216 result = ((__vector
unsigned long long)
1217 vec_vbpermq ((__vector
unsigned char)
__A,
1218 (__vector
unsigned char) perm_mask));
1220 #ifdef __LITTLE_ENDIAN__ 1228 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1234 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1240 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1246 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1252 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1258 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1264 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1267 return (__m128i)
vec_mergel ((__vector
long long) __A,
1268 (__vector
long long) __B);
1271 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1277 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1283 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1289 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1292 return (__m128i)
vec_mergeh ((__vector
long long) __A,
1293 (__vector
long long) __B);
1296 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1302 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1308 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1314 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1320 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1326 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1332 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1338 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1344 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1350 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1356 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1362 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1368 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1374 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1380 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1386 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1392 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1395 __vector
signed int zero = {0, 0, 0, 0};
1400 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1403 __vector
signed int w0,
w1;
1406 #ifdef __LITTLE_ENDIAN__ 1407 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
1408 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
1410 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15,
1411 0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D
1417 return (__m128i)
vec_perm (w0, w1, xform1);
1420 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1426 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1429 unsigned int a =
__A;
1430 unsigned int b =
__B;
1435 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1441 #ifdef __LITTLE_ENDIAN__ 1446 :
"v" (__A),
"v" (__B)
1453 :
"v" (__A),
"v" (__B)
1462 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1466 __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
1468 if (__B >= 0 && __B < 16)
1470 if (__builtin_constant_p(__B))
1481 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1485 __v4si result = { 0, 0, 0, 0 };
1487 if (__B >= 0 && __B < 32)
1489 if (__builtin_constant_p(__B) && __B < 16)
1501 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1505 __v2di result = { 0, 0 };
1507 if (__B >= 0 && __B < 64)
1509 if (__builtin_constant_p(__B) && __B < 16)
1521 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1529 if (__builtin_constant_p(__B))
1539 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1547 if (__builtin_constant_p(__B))
1562 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1566 const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1576 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1580 const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1583 #ifdef __LITTLE_ENDIAN__ 1584 if (__builtin_constant_p(__N))
1593 #ifdef __LITTLE_ENDIAN__ 1605 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1611 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1615 const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1618 #ifdef __LITTLE_ENDIAN__ 1629 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1634 __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
1638 if (__builtin_constant_p(__B))
1649 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1653 __v4si result = { 0, 0, 0, 0 };
1657 if (__builtin_constant_p(__B))
1674 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1678 __v2di result = { 0, 0 };
1682 if (__builtin_constant_p(__B))
1699 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1707 #ifdef __LITTLE_ENDIAN__ 1719 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1723 __vector __bool
int shmask;
1724 const __v4su shmax = { 32, 32, 32, 32 };
1726 #ifdef __LITTLE_ENDIAN__ 1739 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1743 __vector __bool
long long shmask;
1744 const __v2du shmax = { 64, 64 };
1756 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1759 const __v8hu rshmax = { 15, 15, 15, 15, 15, 15, 15, 15 };
1763 #ifdef __LITTLE_ENDIAN__ 1768 rshift =
vec_min (rshift, rshmax);
1774 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1777 const __v4su rshmax = { 31, 31, 31, 31 };
1781 #ifdef __LITTLE_ENDIAN__ 1786 rshift =
vec_min (rshift, rshmax);
1792 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1796 __vector __bool
short shmask;
1797 const __v8hu shmax = { 15, 15, 15, 15, 15, 15, 15, 15 };
1800 #ifdef __LITTLE_ENDIAN__ 1812 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1816 __vector __bool
int shmask;
1817 const __v4su shmax = { 32, 32, 32, 32 };
1820 #ifdef __LITTLE_ENDIAN__ 1833 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1837 __vector __bool
long long shmask;
1838 const __v2du shmax = { 64, 64 };
1850 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1856 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1862 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1868 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1874 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1880 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1886 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1892 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1898 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1904 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1910 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1916 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1922 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1928 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1934 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1940 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1946 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1952 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1955 return (
unsigned short) ((
__v8hi)__A)[__N & 7];
1958 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1963 result [(__N & 7)] = __D;
1968 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1974 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1980 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1986 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1997 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2000 __vector
unsigned long long result;
2001 static const __vector
unsigned char perm_mask =
2003 0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40,
2004 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00
2007 result = ((__vector
unsigned long long)
2008 vec_vbpermq ((__vector
unsigned char)
__A,
2009 (__vector
unsigned char) perm_mask));
2011 #ifdef __LITTLE_ENDIAN__ 2019 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2024 #ifdef __LITTLE_ENDIAN__ 2025 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
2026 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
2028 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15,
2029 0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D
2035 return (__m128i)
vec_perm (w0, w1, xform1);
2038 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2041 unsigned long element_selector_98 = __mask & 0x03;
2047 #ifdef __LITTLE_ENDIAN__ 2048 0x0908, 0x0B0A, 0x0D0C, 0x0F0E
2050 0x0809, 0x0A0B, 0x0C0D, 0x0E0F
2054 #ifdef __LITTLE_ENDIAN__ 2055 { 0x1716151413121110UL, 0UL};
2057 { 0x1011121314151617UL, 0UL};
2062 t.as_short[0] = permute_selectors[element_selector_98];
2066 pmask[1] = t.as_m64;
2068 r =
vec_perm (a, a, (__vector
unsigned char)pmask);
2072 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2075 unsigned long element_selector_10 = __mask & 0x03;
2079 static const unsigned short permute_selectors[4] =
2081 #ifdef __LITTLE_ENDIAN__ 2082 0x0100, 0x0302, 0x0504, 0x0706
2084 0x0001, 0x0203, 0x0405, 0x0607
2088 #ifdef __LITTLE_ENDIAN__ 2089 { 0UL, 0x1f1e1d1c1b1a1918UL};
2091 { 0UL, 0x18191a1b1c1d1e1fUL};
2095 t.as_short[0] = permute_selectors[element_selector_10];
2099 pmask[0] = t.as_m64;
2101 r =
vec_perm (a, a, (__vector
unsigned char)pmask);
2105 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2108 unsigned long element_selector_10 = __mask & 0x03;
2112 static const unsigned int permute_selectors[4] =
2114 #ifdef __LITTLE_ENDIAN__ 2115 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
2117 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
2122 t[0] = permute_selectors[element_selector_10];
2129 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2132 __v2du hibit = { 0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL};
2134 __m128i_u *
p = (__m128i_u*)__C;
2142 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2148 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2155 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2161 const __v4su zero = { 0, 0, 0, 0 };
2168 vabsdiff =
vec_sub (vmax, vmin);
2170 vsum = (__vector
signed int)
vec_sum4s (vabsdiff, zero);
2172 result = vec_sum2s (vsum, (__vector
signed int) zero);
2174 #ifdef __LITTLE_ENDIAN__ 2175 result =
vec_sld (result, result, 4);
2177 result =
vec_sld (result, result, 6);
2183 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2184 _mm_stream_si32 (
int *__A,
int __B)
2196 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2197 _mm_stream_si64 (
long long int *__A,
long long int __B)
2209 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2222 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2232 *(__m128d*)__A = __B;
2235 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2247 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2251 __atomic_thread_fence (__ATOMIC_RELEASE);
2254 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
2258 __atomic_thread_fence (__ATOMIC_SEQ_CST);
2261 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2267 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2268 _mm_cvtsi64_si128 (
long long __A)
2270 return __extension__ (__m128i)(
__v2di){
__A, 0LL };
2274 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2275 _mm_cvtsi64x_si128 (
long long __A)
2277 return __extension__ (__m128i)(
__v2di){
__A, 0LL };
2282 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2285 return (__m128)
__A;
2288 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2291 return (__m128i)
__A;
2294 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2297 return (__m128d)
__A;
2300 extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2303 return (__m128i)
__A;
2306 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2309 return (__m128)
__A;
2312 extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
2315 return (__m128d)
__A;
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static const unsigned short permute_selectors[4]
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
#define _mm_bslli_si128(a, imm)
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a)
Converts the low-order element of a 128-bit vector of [2 x double] into a 32-bit signed integer value...
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
__inline __m128i int const __D
__inline __m128i char char char char char char char char char char char __q04
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
__inline __m128i char __q14
static __inline__ vector signed char __ATTRS_o_ai vec_ld(int __a, const vector signed char *__b)
unsigned long element_selector_DC
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
Initializes the 16-bit values in a 128-bit vector of [8 x i16] with the specified 16-bit integer valu...
const __v2du double_exp_mask
static __inline__ vector unsigned char __ATTRS_o_ai vec_packsu(vector short __a, vector short __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmple(vector signed char __a, vector signed char __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 8-bit integral v...
unsigned long element_selector_BA
return vec_perm((__v4sf) __A,(__v4sf) __B,(__vector unsigned char) t)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q)
Initializes both values in a 128-bit vector of [2 x i64] with the specified 64-bit value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 32-bit integral ...
__vector signed char __v16qi
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit signed integers...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
_mm_storeu_si128(p,(__m128i) tmp)
#define _mm_bsrli_si128(a, imm)
return vec_sel(__B, __A, m)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a)
Stores the lower 64 bits of a 128-bit vector of [2 x double] to a memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a)
Casts a 128-bit floating-point vector of [4 x float] into a 128-bit integer vector.
static __inline__ vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a, vector signed char __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w)
Constructs a 128-bit floating-point vector of [2 x double], with each of the two double-precision flo...
__inline __m128i char char char char char char char char char char char char char __q02
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
unsigned long element_selector_FE
__inline __m128i long long __q0
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b)
Compares lower 64-bit double-precision values of both operands, and returns the greater of the pair o...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
__inline __m128i char char char char char char __q09
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
__inline __m128 const float __Y
static __inline__ vector signed char __ATTRS_o_ai vec_nor(vector signed char __a, vector signed char __b)
#define _mm_shufflehi_epi16(a, imm)
Constructs a 128-bit integer vector by shuffling four upper 16-bit elements of a 128-bit integer vect...
static __inline__ vector signed char __ATTRS_o_ai vec_splat(vector signed char __a, unsigned const int __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, __m64 __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the two 64-bit integer vecto...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp)
Loads a double-precision value into the low-order bits of a 128-bit vector of [2 x double]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit unsigned integer values in the input and returns the differences in th...
__inline __m128i char char char char __q11
__inline __m128 __m64 const * __P
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
__inline __m128i short short short short short short short __q7
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding 16-bit values of the 128-bit integer vectors for equality...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 16-bit integral ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Moves bytes selected by the mask from the first operand to the specified unaligned memory location...
__vector unsigned char __v16qu
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a)
Casts a 128-bit integer vector into a 128-bit floating-point vector of [4 x float].
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
#define _mm_extract_epi16(a, imm)
Extracts 16 bits from a 128-bit integer vector of [8 x i16], using the immediate-value parameter as a...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b)
Compares lower 64-bit double-precision values of both operands, and returns the lesser of the pair of...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a)
Moves the 64-bit operand to a 128-bit integer vector, zeroing the upper bits.
static __inline__ vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a, vector unsigned char __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a)
Returns the lower 64 bits of a 128-bit integer vector as a 64-bit integer.
static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a)
Converts the two signed 32-bit integer elements of a 64-bit vector of [2 x i32] into two double-preci...
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp)
Loads a 128-bit floating-point vector of [2 x double] from an unaligned memory location.
__inline __m128d double __X
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
void _mm_mfence(void)
Forces strong memory ordering (serialization) between load and store instructions preceding this inst...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp)
Loads a double-precision floating-point value from a specified memory location and duplicates it to b...
__inline __m128i short short __q5
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a)
Casts a 128-bit floating-point vector of [2 x double] into a 128-bit floating-point vector of [4 x fl...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
__inline __m128i char char char char char char char char __q07
__inline __m128i char char char __q12
static __inline__ vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a, vector unsigned char __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
__inline __m128i char char char char char char char __q08
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
__vector unsigned char xform1
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors...
__asm__("vmuleuw %0,%1,%2" :"=v"(result) :"v"(__A), "v"(__B) :)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a)
Moves packed double-precision values from a 128-bit vector of [2 x double] to a memory location...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit signed integers...
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp)
Loads a 64-bit double-precision value to the low element of a 128-bit integer vector and clears the u...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x)
Constructs a 128-bit floating-point vector of [2 x double], initialized in reverse order with the spe...
#define _mm_shuffle_pd(a, b, i)
Constructs a 128-bit floating-point vector of [2 x double] from two 128-bit vector parameters of [2 x...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a)
Returns a vector of [4 x i32] where the lowest element is the input operand and the remaining element...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
return() __m64((__vector long long) c)[0]
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a)
Stores two double-precision values, in reverse order, from a 128-bit vector of [2 x double] to a 16-b...
static __inline__ vector signed char __ATTRS_o_ai vec_andc(vector signed char __a, vector signed char __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a)
Casts a 128-bit floating-point vector of [4 x float] into a 128-bit floating-point vector of [2 x dou...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
__vector unsigned long long __v2du
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b)
Converts the lower double-precision floating-point element of a 128-bit vector of [2 x double]...
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w)
Constructs a 128-bit floating-point vector of [2 x double], with each of the two double-precision flo...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
__inline __m128i char char char char char char char char char __q06
__inline void __m128d __A
static __inline__ vector signed char __ATTRS_o_ai vec_sra(vector signed char __a, vector unsigned char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32], truncating the result when it is inexact...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x)
Constructs a 128-bit floating-point vector of [2 x double] initialized with the specified double-prec...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp)
Loads a 128-bit floating-point vector of [2 x double] from an aligned memory location.
unsigned long element_selector_54
static __inline__ vector int __ATTRS_o_ai vec_vmrghw(vector int __a, vector int __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ vector signed char __ATTRS_o_ai vec_max(vector signed char __a, vector signed char __b)
unsigned long element_selector_32
__inline __m128d __m128d __B
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
__inline __m128i int int __q1
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding 32-bit values of the 128-bit integer vectors for equality...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a)
Moves the lower 64 bits of a 128-bit integer vector to a 128-bit integer vector, zeroing the upper bi...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a)
Stores the lower 64 bits of a 128-bit vector of [2 x double] to a memory location.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
#define vec_ctf(__a, __b)
#define _mm_insert_epi16(a, b, imm)
Constructs a 128-bit integer vector by first making a copy of the 128-bit integer vector parameter...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b)
Calculates the square root of the lower double-precision value of the second operand and returns it i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
Initializes the 32-bit values in a 128-bit vector of [4 x i32] with the specified 32-bit integer valu...
void _mm_clflush(void const *__p)
The cache line containing __p is flushed and invalidated from all caches in the coherency domain...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
static __inline__ vector bool char __ATTRS_o_ai vec_cmpge(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i_u const *__p)
Returns a vector of [2 x i64] where the lower element is taken from the lower element of the operand...
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a)
Stores a 128-bit integer vector to a 128-bit aligned memory location.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
__inline __m128d __m128d const int __mask
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a)
Stores a 128-bit vector of [2 x double] into an unaligned memory location.
static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a)
Returns the low-order element of a 128-bit vector of [2 x double] as a double-precision floating-poin...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
__inline __m128i char char char char char char char char char char char char char char char __q00
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
__inline __m128 const float const float const float __W
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a)
Casts a 128-bit floating-point vector of [2 x double] into a 128-bit integer vector.
#define _mm_shufflelo_epi16(a, imm)
Constructs a 128-bit integer vector by shuffling four lower 16-bit elements of a 128-bit integer vect...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a)
Converts the low-order element of a [2 x double] vector into a 32-bit signed integer value...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, __m128i __a)
Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to a memory location.
__vector __bool short shmask
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a)
Moves the least significant 32 bits of a vector of [4 x i32] to a 32-bit signed integer value...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Initializes the 8-bit values in a 128-bit vector of [16 x i8] with the specified 8-bit integer values...
__inline __m128i char char char char char char char char char char __q05
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a)
Casts a 128-bit integer vector into a 128-bit floating-point vector of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
static __inline__ vector int __ATTRS_o_ai vec_splat_s32(signed char __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 64-bit integral ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors...
__inline void __m128i char * __C
__vector unsigned int __v4su
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a)
Stores the upper 64 bits of a 128-bit vector of [2 x double] to a memory location.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b)
Converts a 32-bit signed integer value, in the second parameter, into a double-precision floating-poi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
__inline __m128i short short short short __q3
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
static __inline__ void __ATTRS_o_ai vec_st(vector signed char __a, int __b, vector signed char *__c)
static __inline__ vector signed char __ATTRS_o_ai vec_avg(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit signed integer values in the input and returns the differences in the ...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
__inline __m128i char char char char char __q10
__inline __m128i const int __N
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit unsigned integer...
__inline __m128i char char __q13
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit unsigned integer values in the input and returns the differences in the...
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b)
Stores a 128-bit integer vector to a memory location aligned on a 128-bit boundary.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
#define _mm_srli_si128(a, imm)
Right-shifts the 128-bit integer vector operand by the specified number of bytes. ...
static __inline__ vector short __ATTRS_o_ai vec_mule(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
__inline __m128i char char char char char char char char char char char char __q03
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b)
Computes the absolute differences of corresponding 8-bit integer values in two 128-bit vectors...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
__inline __m128i short short short __q4
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
#define _mm_shuffle_epi32(a, imm)
Constructs a 128-bit integer vector by shuffling four 32-bit elements of a 128-bit integer vector par...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a)
Stores a 128-bit floating point vector of [2 x double] to a 128-bit aligned memory location...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded avarages of corresponding elements of two 128-bit unsigned [8 x i16] vectors...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp)
Loads a double-precision value into the high-order bits of a 128-bit vector of [2 x double]...
__vector long long __v2di
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
__inline __m128i char char char char char char char char char char char char char char __q01
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] for...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b)
Converts the lower single-precision floating-point element of a 128-bit vector of [4 x float]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a)
Extracts the sign bits of the double-precision values in the 128-bit vector of [2 x double]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit signed integer values in the input and returns the differences in the c...
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
__inline __m128i int __q2
__inline __m128i short __q6
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
__vector unsigned short __v8hu
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp)
Loads two double-precision values, in reverse order, from an aligned memory location into a 128-bit v...
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a)
Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to the upper and lower 64 bits of a...
__inline __m128i char char char char char char char char char char char char char char char __q15
static __inline__ vector signed char __ATTRS_o_ai vec_mergeh(vector signed char __a, vector signed char __b)
__inline __m128i const int _imm5
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
static __inline__ vector short __ATTRS_o_ai vec_splat_s16(signed char __a)
#define _mm_slli_si128(a, imm)
Left-shifts the 128-bit integer vector operand by the specified number of bytes.
static __inline__ vector signed char __ATTRS_o_ai vec_min(vector signed char __a, vector signed char __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded avarages of corresponding elements of two 128-bit unsigned [16 x i8] vectors...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
void _mm_lfence(void)
Forces strong memory ordering (serialization) between load instructions preceding this instruction an...
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a)
Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to the upper and lower 64 bits of a...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
unsigned long element_selector_76