50798 – [SIMD] code not recognized as all_true

LLVM Bugzilla is read-only and represents the historical archive of all LLVM issues filled before November 26, 2021. Use github to submit LLVM bugs

Bug 50798 - [SIMD] code not recognized as all_true

Summary: [SIMD] code not recognized as all_true

Status:	CONFIRMED

Alias:	None

Product:	libraries
Classification:	Unclassified
Component:	Backend: WebAssembly (show other bugs)
Version:	trunk
Hardware:	PC Windows NT

Importance:	P enhancement
Assignee:	Unassigned LLVM Bugs

URL:
Keywords:

Depends on:
Blocks:

Reported:	2021-06-22 06:59 PDT by Evan Nemerson
Modified:	2021-07-30 07:43 PDT (History)
CC List:	3 users (show)

See Also:	50796
Fixed By Commit(s):

Attachments
Add an attachment (proposed patch, testcase, etc.)

Note You need to log in before you can comment on or make changes to this bug.

Description Evan Nemerson 2021-06-22 06:59:02 PDT

LLVM doesn't recognize that it can optimize these functions to an *.all_true instruction..  This one might be a bit too specialized; it doesn't surprise me at all that LLVM doesn't generate an *.all_true instruction.  Still, it would be nice since the scalar version is pretty bad.

I've included an OpenMP SIMD annotation to try to help the compiler, but obviously it would be better if it weren't required.

Here is the example, or on Compiler Explorer at https://godbolt.org/z/x3xshzYrf if you prefer:


#include <stdint.h>

#pragma clang diagnostic ignored "-Wmissing-prototypes"

/* Because of <https://bugs.llvm.org/show_bug.cgi?id=45959>, not
 * a WASM issue. */
#pragma clang diagnostic ignored "-Wsign-conversion"

typedef int8_t i8x16 __attribute__((__vector_size__(16)));
typedef int16_t i16x8 __attribute__((__vector_size__(16)));
typedef int32_t i32x4 __attribute__((__vector_size__(16)));
typedef int64_t i64x2 __attribute__((__vector_size__(16)));

int
i8x16_all_true(i8x16 a) {
    int8_t r = ~0;
    #pragma omp simd reduction(&:r)
    for (int i = 0 ; i < 16 ; i++) {
        r &= a[i];
    }
    return !!r;
}

int
i8x16_all_true_intrin(i8x16 a) {
    return __builtin_wasm_all_true_i8x16(a);
}

int
i16x8_all_true(i16x8 a) {
    int16_t r = ~0;
    #pragma omp simd reduction(&:r)
    for (int i = 0 ; i < 8 ; i++) {
        r &= a[i];
    }
    return !!r;
}

int
i16x8_all_true_intrin(i16x8 a) {
    return __builtin_wasm_all_true_i16x8(a);
}

int
i32x4_all_true(i32x4 a) {
    int32_t r = ~0;
    #pragma omp simd reduction(&:r)
    for (int i = 0 ; i < 4 ; i++) {
        r &= a[i];
    }
    return !!r;
}

int
i32x4_all_true_intrin(i32x4 a) {
    return __builtin_wasm_all_true_i32x4(a);
}

int
i64x2_all_true(i64x2 a) {
    int64_t r = ~0;
    #pragma omp simd reduction(&:r)
    for (int i = 0 ; i < 2 ; i++) {
        r &= a[i];
    }
    return !!r;
}

int
i64x2_all_true_intrin(i64x2 a) {
    return __builtin_wasm_all_true_i64x2(a);
}

Comment 1 Evan Nemerson 2021-06-22 07:02:39 PDT

Sorry, copied from the wrong tab, here is a corrected test (Compiler Explorer: https://godbolt.org/z/ref5rf19z):


#include <stdint.h>

#pragma clang diagnostic ignored "-Wmissing-prototypes"

/* Because of <https://bugs.llvm.org/show_bug.cgi?id=45959>, not
 * a WASM issue. */
#pragma clang diagnostic ignored "-Wsign-conversion"

typedef int8_t i8x16 __attribute__((__vector_size__(16)));
typedef int16_t i16x8 __attribute__((__vector_size__(16)));
typedef int32_t i32x4 __attribute__((__vector_size__(16)));
typedef int64_t i64x2 __attribute__((__vector_size__(16)));

int
i8x16_all_true(i8x16 a) {
    int8_t r = 1;
    #pragma omp simd reduction(&:r)
    for (int i = 0 ; i < 16 ; i++) {
        r &= !!a[i];
    }
    return r;
}

int
i8x16_all_true_intrin(i8x16 a) {
    return __builtin_wasm_all_true_i8x16(a);
}

int
i16x8_all_true(i16x8 a) {
    int16_t r = 1;
    #pragma omp simd reduction(&:r)
    for (int i = 0 ; i < 8 ; i++) {
        r &= !!a[i];
    }
    return r;
}

int
i16x8_all_true_intrin(i16x8 a) {
    return __builtin_wasm_all_true_i16x8(a);
}

int
i32x4_all_true(i32x4 a) {
    int32_t r = 1;
    #pragma omp simd reduction(&:r)
    for (int i = 0 ; i < 4 ; i++) {
        r &= !!a[i];
    }
    return r;
}

int
i32x4_all_true_intrin(i32x4 a) {
    return __builtin_wasm_all_true_i32x4(a);
}

int
i64x2_all_true(i64x2 a) {
    int64_t r = 1;
    #pragma omp simd reduction(&:r)
    for (int i = 0 ; i < 2 ; i++) {
        r &= !!a[i];
    }
    return r;
}

int
i64x2_all_true_intrin(i64x2 a) {
    return __builtin_wasm_all_true_i64x2(a);
}

Comment 2 Thomas Lively 2021-07-30 07:43:46 PDT

This one is going to be a bit trickier than https://bugs.llvm.org/show_bug.cgi?id=50796 because I don't believe there is an existing reduction intrinsic corresponding to this operation.