13 #ifndef NO_WARN_X86_INTRINSICS 31 #error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." 34 #ifndef _XMMINTRIN_H_INCLUDED 35 #define _XMMINTRIN_H_INCLUDED 38 #define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) 45 #if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \ 46 (defined(__STDC_VERSION__) && \ 47 __STDC_VERSION__ >= 201112L)) 58 #include <mm_malloc.h> 63 typedef float __m128
__attribute__ ((__vector_size__ (16), __may_alias__));
66 typedef float __m128_u
__attribute__ ((__vector_size__ (16), __may_alias__,
73 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
81 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
84 return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
88 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
91 return ((__m128)
vec_ld(0, (__v4sf*)__P));
95 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
98 return (vec_vsx_ld(0, __P));
102 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
108 { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
109 0x17, 0x10, 0x11, 0x12, 0x13 };
111 __tmp =
vec_ld (0, (__v4sf *) __P);
112 result = (__m128)
vec_perm (__tmp, __tmp, permute_vector);
117 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
120 return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
123 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
130 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
133 return __extension__ (__m128)(__v4sf){
__W,
__X,
__Y, __Z };
137 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
140 return __extension__ (__m128)(__v4sf){ __Z,
__Y,
__X, __W };
144 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
147 vec_st((__v4sf)__A, 0, (__v4sf*)__P);
151 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
154 *(__m128_u *)__P = __A;
158 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
163 { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16,
164 0x17, 0x10, 0x11, 0x12, 0x13 };
166 __tmp = (__m128)
vec_perm (__A, __A, permute_vector);
172 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
179 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
186 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
189 return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f };
193 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
196 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
198 return (
vec_sel ((__v4sf)__A, (__v4sf)__B, mask));
202 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
209 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
212 *__P = ((__v4sf)__A)[0];
219 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
224 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
234 return (
vec_sel (__A, c, mask));
236 __A[0] = __A[0] + __B[0];
241 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
246 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
256 return (
vec_sel (__A, c, mask));
258 __A[0] = __A[0] - __B[0];
263 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
268 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
278 return (
vec_sel (__A, c, mask));
280 __A[0] = __A[0] * __B[0];
285 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
290 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
300 return (
vec_sel (__A, c, mask));
302 __A[0] = __A[0] / __B[0];
307 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
311 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
320 return (
vec_sel (__A, c, mask));
324 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
327 return (__m128) ((__v4sf)__A + (__v4sf)
__B);
330 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
333 return (__m128) ((__v4sf)__A - (__v4sf)
__B);
336 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
339 return (__m128) ((__v4sf)__A * (__v4sf)
__B);
342 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
345 return (__m128) ((__v4sf)__A / (__v4sf)
__B);
348 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
351 return (vec_sqrt ((__v4sf)__A));
354 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
357 return (
vec_re ((__v4sf)__A));
360 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
366 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
370 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
379 return (
vec_sel (__A, c, mask));
382 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
386 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
395 return (
vec_sel (__A, c, mask));
398 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
402 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
412 return (
vec_sel ((__v4sf)__A, c, mask));
415 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
419 static const __vector
unsigned int mask = {0xffffffff, 0, 0, 0};
429 return (
vec_sel ((__v4sf)__A, c, mask));
432 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
435 __vector __bool
int m =
vec_cmpgt ((__v4sf) __B, (__v4sf) __A);
439 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
442 __vector __bool
int m =
vec_cmpgt ((__v4sf) __A, (__v4sf) __B);
447 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
450 return ((__m128)
vec_and ((__v4sf)__A, (__v4sf)__B));
454 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
457 return ((__m128)
vec_andc ((__v4sf)__B, (__v4sf)__A));
460 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
463 return ((__m128)
vec_or ((__v4sf)__A, (__v4sf)__B));
466 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
469 return ((__m128)
vec_xor ((__v4sf)__A, (__v4sf)__B));
475 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
478 return ((__m128)
vec_cmpeq ((__v4sf)__A,(__v4sf) __B));
481 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
484 return ((__m128)
vec_cmplt ((__v4sf)__A, (__v4sf)__B));
487 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
490 return ((__m128)
vec_cmple ((__v4sf)__A, (__v4sf)__B));
493 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
496 return ((__m128)
vec_cmpgt ((__v4sf)__A, (__v4sf)__B));
499 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
502 return ((__m128)
vec_cmpge ((__v4sf)__A, (__v4sf)__B));
505 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
508 __v4sf temp = (__v4sf )
vec_cmpeq ((__v4sf)
__A, (__v4sf)__B);
509 return ((__m128)
vec_nor (temp, temp));
512 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
515 return ((__m128)
vec_cmpge ((__v4sf)__A, (__v4sf)__B));
518 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
521 return ((__m128)
vec_cmpgt ((__v4sf)__A, (__v4sf)__B));
524 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
527 return ((__m128)
vec_cmple ((__v4sf)__A, (__v4sf)__B));
530 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
533 return ((__m128)
vec_cmplt ((__v4sf)__A, (__v4sf)__B));
536 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
539 __vector
unsigned int a,
b;
540 __vector
unsigned int c,
d;
542 { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
544 a = (__vector
unsigned int)
vec_abs ((__v4sf)
__A);
545 b = (__vector
unsigned int)
vec_abs ((__v4sf)
__B);
546 c = (__vector
unsigned int)
vec_cmpgt (float_exp_mask, a);
547 d = (__vector
unsigned int)
vec_cmpgt (float_exp_mask, b);
548 return ((__m128 )
vec_and (c, d));
551 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
554 __vector
unsigned int a,
b;
555 __vector
unsigned int c,
d;
557 { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
559 a = (__vector
unsigned int)
vec_abs ((__v4sf)
__A);
560 b = (__vector
unsigned int)
vec_abs ((__v4sf)
__B);
561 c = (__vector
unsigned int)
vec_cmpgt (a, float_exp_mask);
562 d = (__vector
unsigned int)
vec_cmpgt (b, float_exp_mask);
563 return ((__m128 )
vec_or (c, d));
569 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
572 static const __vector
unsigned int mask =
573 { 0xffffffff, 0, 0, 0 };
584 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
587 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
590 static const __vector
unsigned int mask =
591 { 0xffffffff, 0, 0, 0 };
602 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
605 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
608 static const __vector
unsigned int mask =
609 { 0xffffffff, 0, 0, 0 };
620 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
623 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
626 static const __vector
unsigned int mask =
627 { 0xffffffff, 0, 0, 0 };
638 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
641 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
644 static const __vector
unsigned int mask =
645 { 0xffffffff, 0, 0, 0 };
656 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
659 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
662 static const __vector
unsigned int mask =
663 { 0xffffffff, 0, 0, 0 };
675 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
678 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
681 static const __vector
unsigned int mask =
682 { 0xffffffff, 0, 0, 0 };
693 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
696 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
699 static const __vector
unsigned int mask =
700 { 0xffffffff, 0, 0, 0 };
711 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
714 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
717 static const __vector
unsigned int mask =
718 { 0xffffffff, 0, 0, 0 };
729 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
732 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
735 static const __vector
unsigned int mask =
736 { 0xffffffff, 0, 0, 0 };
747 return ((__m128)
vec_sel ((__v4sf)__A, c, mask));
750 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
753 __vector
unsigned int a,
b;
754 __vector
unsigned int c,
d;
756 { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
757 static const __vector
unsigned int mask =
758 { 0xffffffff, 0, 0, 0 };
760 a = (__vector
unsigned int)
vec_abs ((__v4sf)
__A);
761 b = (__vector
unsigned int)
vec_abs ((__v4sf)
__B);
762 c = (__vector
unsigned int)
vec_cmpgt (float_exp_mask, a);
763 d = (__vector
unsigned int)
vec_cmpgt (float_exp_mask, b);
767 return ((__m128)
vec_sel ((__v4sf)__A, (__v4sf)c, mask));
770 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
773 __vector
unsigned int a,
b;
774 __vector
unsigned int c,
d;
776 { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 };
777 static const __vector
unsigned int mask =
778 { 0xffffffff, 0, 0, 0 };
780 a = (__vector
unsigned int)
vec_abs ((__v4sf)
__A);
781 b = (__vector
unsigned int)
vec_abs ((__v4sf)
__B);
782 c = (__vector
unsigned int)
vec_cmpgt (a, float_exp_mask);
783 d = (__vector
unsigned int)
vec_cmpgt (b, float_exp_mask);
787 return ((__m128)
vec_sel ((__v4sf)__A, (__v4sf)c, mask));
792 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
795 return (__A[0] == __B[0]);
798 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
801 return (__A[0] < __B[0]);
804 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
807 return (__A[0] <= __B[0]);
810 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
813 return (__A[0] > __B[0]);
816 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
819 return (__A[0] >= __B[0]);
822 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
825 return (__A[0] != __B[0]);
836 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
839 return (__A[0] == __B[0]);
842 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
845 return (__A[0] < __B[0]);
848 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
851 return (__A[0] <= __B[0]);
854 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
857 return (__A[0] > __B[0]);
860 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
863 return (__A[0] >= __B[0]);
866 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
869 return (__A[0] != __B[0]);
872 extern __inline
float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
875 return ((__v4sf)__A)[0];
880 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
887 #ifdef __LITTLE_ENDIAN__
888 "xxsldwi %x0,%x0,%x0,3;\n" 890 "xscvspdp %x2,%x0;\n" 898 res = __builtin_rint(__A[0]);
903 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
913 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
914 _mm_cvtss_si64 (__m128 __A)
920 #ifdef __LITTLE_ENDIAN__
921 "xxsldwi %x0,%x0,%x0,3;\n" 923 "xscvspdp %x2,%x0;\n" 931 res = __builtin_llrint(__A[0]);
937 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
938 _mm_cvtss_si64x (__m128 __A)
940 return _mm_cvtss_si64 ((__v4sf) __A);
957 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
966 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
970 __v4sf temp, rounded;
971 __vector
unsigned long long result;
975 rounded = vec_rint(temp);
976 result = (__vector
unsigned long long)
vec_cts (rounded, 0);
978 return (
__m64) ((__vector
long long) result)[0];
981 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
988 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
997 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1004 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1005 _mm_cvttss_si64 (__m128 __A)
1008 float temp = __A[0];
1014 extern __inline
long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1015 _mm_cvttss_si64x (__m128 __A)
1018 float temp = __A[0];
1025 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1029 __vector
unsigned long long result;
1032 temp = (__v4sf)
vec_splat ((__vector
long long)
__A, 0);
1033 result = (__vector
unsigned long long)
vec_cts (temp, 0);
1035 return (
__m64) ((__vector
long long) result)[0];
1038 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1045 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1054 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1062 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1063 _mm_cvtsi64_ss (__m128 __A,
long long __B)
1072 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1073 _mm_cvtsi64x_ss (__m128 __A,
long long __B)
1075 return _mm_cvtsi64_ss (__A, __B);
1080 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1083 __vector
signed int vm1;
1086 vm1 = (__vector
signed int) (__vector
unsigned long long) {
__B, __B};
1087 vf1 = (__vector float)
vec_ctf (vm1, 0);
1089 return ((__m128) (__vector
unsigned long long)
1090 { ((__vector
unsigned long long)vf1) [0],
1091 ((__vector
unsigned long long)__A) [1]});
1094 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1101 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1104 __vector
signed short vs8;
1105 __vector
signed int vi4;
1108 vs8 = (__vector
signed short) (__vector
unsigned long long) {
__A, __A };
1110 vf1 = (__vector float)
vec_ctf (vi4, 0);
1112 return (__m128)
vf1;
1116 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1119 const __vector
unsigned short zero =
1120 { 0, 0, 0, 0, 0, 0, 0, 0 };
1121 __vector
unsigned short vs8;
1122 __vector
unsigned int vi4;
1125 vs8 = (__vector
unsigned short) (__vector
unsigned long long) {
__A, __A };
1127 #ifdef __LITTLE_ENDIAN__
1132 vf1 = (__vector float)
vec_ctf (vi4, 0);
1134 return (__m128)
vf1;
1138 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1141 __vector
signed char vc16;
1142 __vector
signed short vs8;
1143 __vector
signed int vi4;
1146 vc16 = (__vector
signed char) (__vector
unsigned long long) {
__A, __A };
1149 vf1 = (__vector float)
vec_ctf (vi4, 0);
1151 return (__m128)
vf1;
1155 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1159 const __vector
unsigned char zero =
1160 { 0, 0, 0, 0, 0, 0, 0, 0 };
1161 __vector
unsigned char vc16;
1162 __vector
unsigned short vs8;
1163 __vector
unsigned int vi4;
1166 vc16 = (__vector
unsigned char) (__vector
unsigned long long) {
__A, __A };
1167 #ifdef __LITTLE_ENDIAN__ 1168 vs8 = (__vector
unsigned short)
vec_mergel (vc16, zero);
1169 vi4 = (__vector
unsigned int)
vec_mergeh (vs8,
1170 (__vector
unsigned short)
zero);
1172 vs8 = (__vector
unsigned short)
vec_mergel (zero, vc16);
1173 vi4 = (__vector
unsigned int)
vec_mergeh ((__vector
unsigned short)
zero,
1176 vf1 = (__vector float)
vec_ctf (vi4, 0);
1178 return (__m128)
vf1;
1182 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1185 __vector
signed int vi4;
1188 vi4 = (__vector
signed int) (__vector
unsigned long long) {
__A, __B };
1189 vf4 = (__vector float)
vec_ctf (vi4, 0);
1190 return (__m128)
vf4;
1194 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1198 __vector
signed int temp;
1199 __vector
unsigned long long result;
1201 rounded = vec_rint(__A);
1203 result = (__vector
unsigned long long)
vec_pack (temp, temp);
1205 return (
__m64) ((__vector
long long) result)[0];
1209 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1213 __vector
signed int tmp_i;
1214 static const __vector
signed int zero = {0, 0, 0, 0};
1215 __vector
signed short tmp_s;
1216 __vector
signed char res_v;
1218 rounded = vec_rint(__A);
1222 return (
__m64) ((__vector
long long) res_v)[0];
1226 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1230 unsigned long element_selector_10 = __mask & 0x03;
1236 #ifdef __LITTLE_ENDIAN__ 1237 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
1239 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F
1242 __vector
unsigned int t;
1244 t[0] = permute_selectors[element_selector_10];
1248 return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector
unsigned char)t);
1252 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1255 return (__m128)
vec_vmrglw ((__v4sf) __A, (__v4sf)__B);
1259 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1262 return (__m128)
vec_vmrghw ((__v4sf) __A, (__v4sf)__B);
1267 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1270 __vector
unsigned long long __a = (__vector
unsigned long long)__A;
1278 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1281 __vector
unsigned long long __a = (__vector
unsigned long long) __A;
1287 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1290 return (__m128)
vec_mergel ((__vector
unsigned long long)__B,
1291 (__vector
unsigned long long)__A);
1295 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1298 return (__m128)
vec_mergeh ((__vector
unsigned long long)__A,
1299 (__vector
unsigned long long)__B);
1304 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1307 __vector
unsigned long long __a = (__vector
unsigned long long)__A;
1308 __vector
unsigned long long __p =
vec_splats(*__P);
1315 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1318 __vector
unsigned long long __a = (__vector
unsigned long long) __A;
1327 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1330 __vector
unsigned long long result;
1331 static const __vector
unsigned int perm_mask =
1333 #ifdef __LITTLE_ENDIAN__ 1334 0x00204060, 0x80808080, 0x80808080, 0x80808080
1336 0x80808080, 0x80808080, 0x80808080, 0x00204060
1340 result = ((__vector
unsigned long long)
1341 vec_vbpermq ((__vector
unsigned char)
__A,
1342 (__vector
unsigned char) perm_mask));
1344 #ifdef __LITTLE_ENDIAN__ 1353 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1359 extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
1366 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1369 unsigned int shiftr = __N & 3;
1370 #ifdef __BIG_ENDIAN__ 1371 shiftr = 3 - shiftr;
1374 return ((__A >> (shiftr * 16)) & 0xffff);
1377 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1385 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1388 const int shiftl = (__N & 3) * 16;
1390 const __m64 mask = 0xffffUL << shiftl;
1396 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1403 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1408 __vector
signed short a,
b,
r;
1409 __vector __bool
short c;
1413 c = (__vector __bool short)
vec_cmpgt (a, b);
1415 return (
__m64) ((__vector
long long) r)[0];
1423 (m1.as_short[0] > m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
1425 (m1.as_short[1] > m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
1427 (m1.as_short[2] > m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
1429 (m1.as_short[3] > m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
1431 return (
__m64) res.as_m64;
1435 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1442 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1446 __vector
unsigned char a,
b,
r;
1447 __vector __bool
char c;
1449 a = (__vector
unsigned char)
vec_splats (__A);
1450 b = (__vector
unsigned char)
vec_splats (__B);
1451 c = (__vector __bool char)
vec_cmpgt (a, b);
1453 return (
__m64) ((__vector
long long) r)[0];
1462 for (i = 0; i < 8; i++)
1464 ((
unsigned char) m1.as_char[
i] > (
unsigned char) m2.as_char[i]) ?
1465 m1.as_char[
i] : m2.as_char[
i];
1467 return (
__m64) res.as_m64;
1471 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1478 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1482 __vector
signed short a,
b,
r;
1483 __vector __bool
short c;
1487 c = (__vector __bool short)
vec_cmplt (a, b);
1489 return (
__m64) ((__vector
long long) r)[0];
1497 (m1.as_short[0] < m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0];
1499 (m1.as_short[1] < m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1];
1501 (m1.as_short[2] < m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2];
1503 (m1.as_short[3] < m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3];
1505 return (
__m64) res.as_m64;
1509 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1516 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1520 __vector
unsigned char a,
b,
r;
1521 __vector __bool
char c;
1523 a = (__vector
unsigned char)
vec_splats (__A);
1524 b = (__vector
unsigned char)
vec_splats (__B);
1525 c = (__vector __bool char)
vec_cmplt (a, b);
1527 return (
__m64) ((__vector
long long) r)[0];
1536 for (i = 0; i < 8; i++)
1538 ((
unsigned char) m1.as_char[
i] < (
unsigned char) m2.as_char[i]) ?
1539 m1.as_char[
i] : m2.as_char[
i];
1541 return (
__m64) res.as_m64;
1545 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1552 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1555 unsigned long long p =
1556 #ifdef __LITTLE_ENDIAN__ 1557 0x0008101820283038UL;
1559 0x3830282018100800UL;
1561 return __builtin_bpermd (p, __A);
1564 extern __inline
int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1572 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1575 __vector
unsigned short a,
b;
1576 __vector
unsigned short c;
1579 #ifdef __LITTLE_ENDIAN__ 1580 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17,
1581 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
1583 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15,
1584 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15
1588 a = (__vector
unsigned short)
vec_splats (__A);
1589 b = (__vector
unsigned short)
vec_splats (__B);
1591 w0 = vec_vmuleuh (a, b);
1592 w1 = vec_vmulouh (a, b);
1593 c = (__vector
unsigned short)
vec_perm (w0, w1, xform1);
1595 return (
__m64) ((__vector
long long) c)[0];
1598 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1606 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1609 unsigned long element_selector_10 = __N & 0x03;
1610 unsigned long element_selector_32 = (__N >> 2) & 0x03;
1611 unsigned long element_selector_54 = (__N >> 4) & 0x03;
1612 unsigned long element_selector_76 = (__N >> 6) & 0x03;
1613 static const unsigned short permute_selectors[4] =
1615 #ifdef __LITTLE_ENDIAN__ 1616 0x0908, 0x0B0A, 0x0D0C, 0x0F0E
1618 0x0607, 0x0405, 0x0203, 0x0001
1622 __vector
unsigned long long a,
p,
r;
1624 #ifdef __LITTLE_ENDIAN__ 1625 t.as_short[0] = permute_selectors[element_selector_10];
1630 t.as_short[3] = permute_selectors[element_selector_10];
1637 r =
vec_perm (a, a, (__vector
unsigned char)p);
1638 return (
__m64) ((__vector
long long) r)[0];
1641 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1650 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1653 __m64 hibit = 0x8080808080808080UL;
1659 tmp = (tmp & (~mask)) | (__A &
mask);
1663 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1670 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1673 __vector
unsigned char a,
b,
c;
1675 a = (__vector
unsigned char)
vec_splats (__A);
1676 b = (__vector
unsigned char)
vec_splats (__B);
1678 return (
__m64) ((__vector
long long) c)[0];
1681 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1688 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1691 __vector
unsigned short a,
b,
c;
1693 a = (__vector
unsigned short)
vec_splats (__A);
1694 b = (__vector
unsigned short)
vec_splats (__B);
1696 return (
__m64) ((__vector
long long) c)[0];
1699 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1708 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1711 __vector
unsigned char a,
b;
1718 a = (__vector
unsigned char) (__vector
unsigned long long) { 0UL, __A };
1719 b = (__vector
unsigned char) (__vector
unsigned long long) { 0UL, __B };
1722 vabsdiff =
vec_sub (vmax, vmin);
1724 vsum = (__vector
signed int)
vec_sum4s (vabsdiff, zero);
1726 vsum = vec_sums (vsum, (__vector
signed int) zero);
1729 result.as_short[0] = vsum[3];
1730 return result.as_m64;
1733 extern __inline
__m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1740 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1754 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1769 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1773 __atomic_thread_fence (__ATOMIC_RELEASE);
1781 extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1797 unsigned long __PPR;
1813 __atomic_thread_fence (__ATOMIC_SEQ_CST);
1818 #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ 1820 __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ 1821 __v4sf __t0 = vec_vmrghw (__r0, __r1); \ 1822 __v4sf __t1 = vec_vmrghw (__r2, __r3); \ 1823 __v4sf __t2 = vec_vmrglw (__r0, __r1); \ 1824 __v4sf __t3 = vec_vmrglw (__r2, __r3); \ 1825 (row0) = (__v4sf)vec_mergeh ((__vector long long)__t0, \ 1826 (__vector long long)__t1); \ 1827 (row1) = (__v4sf)vec_mergel ((__vector long long)__t0, \ 1828 (__vector long long)__t1); \ 1829 (row2) = (__v4sf)vec_mergeh ((__vector long long)__t2, \ 1830 (__vector long long)__t3); \ 1831 (row3) = (__v4sf)vec_mergel ((__vector long long)__t2, \ 1832 (__vector long long)__t3); \ static __inline__ vector int __ATTRS_o_ai vec_vupkhsh(vector short __a)
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ss(float *__p, __m128 __a)
Stores the lower 32 bits of a 128-bit vector of [4 x float] to a memory location. ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
__inline __m128 __m128 __B
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sad_pu8(__m64 __a, __m64 __b)
Subtracts the corresponding 8-bit unsigned integer values of the two 64-bit vector operands and compu...
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_ps(__m128 __a, __m64 __b)
Converts two elements of a 64-bit vector of [2 x i32] into two floating point values and writes them ...
static const __vector unsigned char permute_vector
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
static __inline__ vector int __ATTRS_o_ai vec_vupklsh(vector short __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands and returns the lesser value ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadh_pi(__m128 __a, const __m64 *__p)
Loads two packed float values from the address __p into the high-order bits of a 128-bit vector of [4...
static __inline__ vector signed char __ATTRS_o_ai vec_ld(int __a, const vector signed char *__b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands for equality and returns the ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi32_ss(__m128 __a, int __b)
Converts a 32-bit signed integer value into a floating point value and writes it to the lower 32 bits...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
static __inline__ vector bool char __ATTRS_o_ai vec_cmple(vector signed char __a, vector signed char __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
return vec_perm((__v4sf) __A,(__v4sf) __B,(__vector unsigned char) t)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a)
Calculates the approximate reciprocal of the square root of the value stored in the low-order bits of...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pu8(__m64 __a, __m64 __b)
Compares each of the corresponding packed 8-bit unsigned integer values of the 64-bit integer vectors...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps1(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_ps(float *__p, __m128 __a)
Stores the lower 32 bits of a 128-bit vector of [4 x float] into four contiguous elements in an align...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load1_ps(const float *__p)
Loads a 32-bit float value and duplicates it to all four vector elements of a 128-bit vector of [4 x ...
__inline __m64 int const __D
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition. ...
float __m128 __attribute__((__vector_size__(16), __may_alias__))
return vec_sel(__B, __A, m)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
static __inline__ vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a, vector signed char __b)
void _mm_pause(void)
Indicates that a spin loop is being executed for the purposes of optimizing power consumption during ...
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ps(float *__p, __m128 __a)
Moves packed float values from a 128-bit vector of [4 x float] to a 128-bit aligned memory location...
#define _mm_prefetch(a, sel)
Loads one cache line of data from the specified address to a location closer to the processor...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehl_ps(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
const __vector unsigned int zero
__inline __m128 __m128 int const __mask
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pi(__m64 *__p, __m128 __a)
Stores the upper 64 bits of a 128-bit vector of [4 x float] to a memory location. ...
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps1(float *__p, __m128 __a)
Stores the lower 32 bits of a 128-bit vector of [4 x float] into four contiguous elements in an align...
__inline int int const __N
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
__inline __m128 const float __Y
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadl_pi(__m128 __a, const __m64 *__p)
Loads two packed float values from the address __p into the low-order bits of a 128-bit vector of [4 ...
static __inline__ vector signed char __ATTRS_o_ai vec_nor(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_splat(vector signed char __a, unsigned const int __b)
static __inline__ int __DEFAULT_FN_ATTRS_MMX _mm_movemask_pi8(__m64 __a)
Takes the most significant bit from each 8-bit element in a 64-bit integer vector to create an 8-bit ...
__vector unsigned long long __p
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
__inline __m128 __m64 const * __P
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu16(__m64 __a, __m64 __b)
Computes the rounded averages of the packed unsigned 16-bit integer values and writes the averages to...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands and returns the greater value...
static __inline__ vector int __ATTRS_o_ai vec_vmrglw(vector int __a, vector int __b)
static const unsigned int permute_selectors[4]
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi8_ps(__m64 __a)
Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] into a 128-bit vector of [4 x f...
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtt_ss2si(__m128 __a)
Converts a float value contained in the lower 32 bits of a vector of [4 x float] into a 32-bit intege...
#define _mm_shuffle_pi16(a, n)
Shuffles the 4 16-bit integers from a 64-bit integer vector to the destination, as specified by the i...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set1_ps(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline__ vector unsigned char __ATTRS_o_ai vec_xor(vector unsigned char __a, vector unsigned char __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_ps(float *__p, __m128 __a)
Stores float values from a 128-bit vector of [4 x float] to an aligned memory location in reverse ord...
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_ps(float *__p, __m128 __a)
Stores a 128-bit vector of [4 x float] to an unaligned memory location.
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvt_pi2ps(__m128 __a, __m64 __b)
Converts two elements of a 64-bit vector of [2 x i32] into two floating point values and writes them ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ss(const float *__p)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
static __inline__ vector float __ATTRS_o_ai vec_rsqrte(vector float __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] for ineq...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pi16(__m64 __a, __m64 __b)
Compares each of the corresponding packed 16-bit integer values of the 64-bit integer vectors...
static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtss_f32(__m128 __a)
Extracts a float value contained in the lower 32 bits of a vector of [4 x float]. ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
__asm__("vmuleuw %0,%1,%2" :"=v"(result) :"v"(__A), "v"(__B) :)
__vector unsigned char vabsdiff
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_ps(__m128 __a)
Extracts the sign bits from each single-precision floating-point element of a 128-bit floating-point ...
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhi_pu16(__m64 __a, __m64 __b)
Multiplies packed 16-bit unsigned integer values and writes the high-order 16 bits of each 32-bit pro...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
return() __m64((__vector long long) c)[0]
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a)
Calculates the approximate reciprocals of the values stored in a 128-bit vector of [4 x float]...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
static __inline__ vector signed char __ATTRS_o_ai vec_andc(vector signed char __a, vector signed char __b)
static const __vector unsigned int float_exp_mask
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu8(__m64 __a, __m64 __b)
Computes the rounded averages of the packed unsigned 8-bit integer values and writes the averages to ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
unsigned long element_selector_54
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi32(__m128 __a)
Converts two low-order float values in a 128-bit vector of [4 x float] into a 64-bit vector of [2 x i...
static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a, vector signed char __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
__vector unsigned char vmax
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movelh_ps(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
static __inline__ vector int __ATTRS_o_ai vec_vmrghw(vector int __a, vector int __b)
static __inline__ vector signed char __ATTRS_o_ai vec_max(vector signed char __a, vector signed char __b)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
static const __vector unsigned int mask
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a)
Calculates the square root of the value stored in the low-order bits of a 128-bit vector of [4 x floa...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ps(__m128 __a)
Calculates the approximate reciprocals of the square roots of the values stored in a 128-bit vector o...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ss(float __w)
Constructs a 128-bit floating-point vector of [4 x float].
#define vec_ctf(__a, __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands for equality and returns the ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadr_ps(const float *__p)
Loads four packed float values, in reverse order, from an aligned memory location to 32-bit elements ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtt_ps2pi(__m128 __a)
Converts two low-order float values in a 128-bit vector of [4 x float] into a 64-bit vector of [2 x i...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi8(__m128 __a)
Converts each single-precision floating-point element of a 128-bit floating-point vector of [4 x floa...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvt_si2ss(__m128 __a, int __b)
Converts a 32-bit signed integer value into a floating point value and writes it to the lower 32 bits...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps(float __z, float __y, float __x, float __w)
Constructs a 128-bit floating-point vector of [4 x float] initialized with the specified single-preci...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a)
Calculates the approximate reciprocal of the value stored in the low-order bits of a 128-bit vector o...
static __inline__ vector bool char __ATTRS_o_ai vec_cmpge(vector signed char __a, vector signed char __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] for equa...
#define _mm_insert_pi16(a, d, n)
Copies data from the 64-bit vector of [4 x i16] to the destination, and inserts the lower 16-bits of ...
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
Converts the two 32-bit signed integer values from each 64-bit vector operand of [2 x i32] into a 128...
__inline __m128 const float const float const float __W
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu16_ps(__m64 __a)
Converts a 64-bit vector of 16-bit unsigned integer values into a 128-bit vector of [4 x float]...
void _mm_sfence(void)
Forces strong memory ordering (serialization) between store instructions preceding this instruction a...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
__vector unsigned long long r
__inline void enum _mm_hint __I
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_si32(__m128 __a)
Converts a float value contained in the lower 32 bits of a vector of [4 x float] into a 32-bit intege...
__inline __m128 const float const float __X
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float], using the one's complement of the value...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttps_pi32(__m128 __a)
Converts two low-order float values in a 128-bit vector of [4 x float] into a 64-bit vector of [2 x i...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setr_ps(float __z, float __y, float __x, float __w)
Constructs a 128-bit floating-point vector of [4 x float], initialized in reverse order with the spec...
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
__vector unsigned char xform1
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
unsigned long element_selector_76
static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
Conditionally copies the values from each 8-bit element in the first 64-bit integer vector operand to...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
static __inline__ vector short __ATTRS_o_ai vec_vupkhsb(vector signed char __a)
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
__vector unsigned long long p
static __inline__ void __ATTRS_o_ai vec_st(vector signed char __a, int __b, vector signed char *__c)
static __inline__ vector signed char __ATTRS_o_ai vec_avg(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
static __inline__ vector float __ATTRS_o_ai vec_re(vector float __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b)
Performs a bitwise OR of two 128-bit vectors of [4 x float].
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvt_ss2si(__m128 __a)
Converts a float value contained in the lower 32 bits of a vector of [4 x float] into a 32-bit intege...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pu8(__m64 __a, __m64 __b)
Compares each of the corresponding packed 8-bit unsigned integer values of the 64-bit integer vectors...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu8_ps(__m64 __a)
Converts the lower four unsigned 8-bit integer values from a 64-bit vector of [8 x u8] into a 128-bit...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands for inequality and returns th...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pi16(__m64 __a, __m64 __b)
Compares each of the corresponding packed 16-bit integer values of the 64-bit integer vectors...
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
unsigned long element_selector_32
__vector unsigned char vmin
#define _mm_extract_pi16(a, n)
Extracts 16-bit element from a 64-bit vector of [4 x i16] and returns it, as specified by the immedia...
#define _mm_shuffle_ps(a, b, mask)
Selects 4 float values from the 128-bit operands of [4 x float], as specified by the immediate value ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadu_ps(const float *__p)
Loads a 128-bit floating-point vector of [4 x float] from an unaligned memory location.
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi16_ps(__m64 __a)
Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ps(const float *__p)
Loads a 128-bit floating-point vector of [4 x float] from an aligned memory location.
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi16(__m128 __a)
Converts each single-precision floating-point element of a 128-bit floating-point vector of [4 x floa...
static __inline__ vector signed char __ATTRS_o_ai vec_mergeh(vector signed char __a, vector signed char __b)
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pi(__m64 *__p, __m128 __a)
Stores the lower 64 bits of a 128-bit vector of [4 x float] to a memory location. ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvt_ps2pi(__m128 __a)
Converts two low-order float values in a 128-bit vector of [4 x float] into a 64-bit vector of [2 x i...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
static __inline__ vector signed char __ATTRS_o_ai vec_min(vector signed char __a, vector signed char __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_stream_pi(__m64 *__p, __m64 __a)
Stores a 64-bit integer in the specified aligned memory location.
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtss_si32(__m128 __a)
Converts a float value contained in the lower 32 bits of a vector of [4 x float] into a 32-bit intege...