LLVM doesn't recognize that it can optimize these functions to an *.all_true instruction.. This one might be a bit too specialized; it doesn't surprise me at all that LLVM doesn't generate an *.all_true instruction. Still, it would be nice since the scalar version is pretty bad. I've included an OpenMP SIMD annotation to try to help the compiler, but obviously it would be better if it weren't required. Here is the example, or on Compiler Explorer at https://godbolt.org/z/x3xshzYrf if you prefer: #include <stdint.h> #pragma clang diagnostic ignored "-Wmissing-prototypes" /* Because of <https://bugs.llvm.org/show_bug.cgi?id=45959>, not * a WASM issue. */ #pragma clang diagnostic ignored "-Wsign-conversion" typedef int8_t i8x16 __attribute__((__vector_size__(16))); typedef int16_t i16x8 __attribute__((__vector_size__(16))); typedef int32_t i32x4 __attribute__((__vector_size__(16))); typedef int64_t i64x2 __attribute__((__vector_size__(16))); int i8x16_all_true(i8x16 a) { int8_t r = ~0; #pragma omp simd reduction(&:r) for (int i = 0 ; i < 16 ; i++) { r &= a[i]; } return !!r; } int i8x16_all_true_intrin(i8x16 a) { return __builtin_wasm_all_true_i8x16(a); } int i16x8_all_true(i16x8 a) { int16_t r = ~0; #pragma omp simd reduction(&:r) for (int i = 0 ; i < 8 ; i++) { r &= a[i]; } return !!r; } int i16x8_all_true_intrin(i16x8 a) { return __builtin_wasm_all_true_i16x8(a); } int i32x4_all_true(i32x4 a) { int32_t r = ~0; #pragma omp simd reduction(&:r) for (int i = 0 ; i < 4 ; i++) { r &= a[i]; } return !!r; } int i32x4_all_true_intrin(i32x4 a) { return __builtin_wasm_all_true_i32x4(a); } int i64x2_all_true(i64x2 a) { int64_t r = ~0; #pragma omp simd reduction(&:r) for (int i = 0 ; i < 2 ; i++) { r &= a[i]; } return !!r; } int i64x2_all_true_intrin(i64x2 a) { return __builtin_wasm_all_true_i64x2(a); }
Sorry, copied from the wrong tab, here is a corrected test (Compiler Explorer: https://godbolt.org/z/ref5rf19z): #include <stdint.h> #pragma clang diagnostic ignored "-Wmissing-prototypes" /* Because of <https://bugs.llvm.org/show_bug.cgi?id=45959>, not * a WASM issue. */ #pragma clang diagnostic ignored "-Wsign-conversion" typedef int8_t i8x16 __attribute__((__vector_size__(16))); typedef int16_t i16x8 __attribute__((__vector_size__(16))); typedef int32_t i32x4 __attribute__((__vector_size__(16))); typedef int64_t i64x2 __attribute__((__vector_size__(16))); int i8x16_all_true(i8x16 a) { int8_t r = 1; #pragma omp simd reduction(&:r) for (int i = 0 ; i < 16 ; i++) { r &= !!a[i]; } return r; } int i8x16_all_true_intrin(i8x16 a) { return __builtin_wasm_all_true_i8x16(a); } int i16x8_all_true(i16x8 a) { int16_t r = 1; #pragma omp simd reduction(&:r) for (int i = 0 ; i < 8 ; i++) { r &= !!a[i]; } return r; } int i16x8_all_true_intrin(i16x8 a) { return __builtin_wasm_all_true_i16x8(a); } int i32x4_all_true(i32x4 a) { int32_t r = 1; #pragma omp simd reduction(&:r) for (int i = 0 ; i < 4 ; i++) { r &= !!a[i]; } return r; } int i32x4_all_true_intrin(i32x4 a) { return __builtin_wasm_all_true_i32x4(a); } int i64x2_all_true(i64x2 a) { int64_t r = 1; #pragma omp simd reduction(&:r) for (int i = 0 ; i < 2 ; i++) { r &= !!a[i]; } return r; } int i64x2_all_true_intrin(i64x2 a) { return __builtin_wasm_all_true_i64x2(a); }
This one is going to be a bit trickier than https://bugs.llvm.org/show_bug.cgi?id=50796 because I don't believe there is an existing reduction intrinsic corresponding to this operation.