LLVM 20.0.0git
|
Go to the source code of this file.
Macros | |
#define | _mm_shuffle_ps2(a, b, c) |
#define | LO_IMM8 0x88 |
#define | HI_IMM8 0xdd |
Functions | |
INLINE __m128i | loadu_128 (const uint8_t src[16]) |
INLINE __m256i | loadu_256 (const uint8_t src[32]) |
INLINE __m512i | loadu_512 (const uint8_t src[64]) |
INLINE void | storeu_128 (__m128i src, uint8_t dest[16]) |
INLINE void | storeu_256 (__m256i src, uint8_t dest[16]) |
INLINE __m128i | add_128 (__m128i a, __m128i b) |
INLINE __m256i | add_256 (__m256i a, __m256i b) |
INLINE __m512i | add_512 (__m512i a, __m512i b) |
INLINE __m128i | xor_128 (__m128i a, __m128i b) |
INLINE __m256i | xor_256 (__m256i a, __m256i b) |
INLINE __m512i | xor_512 (__m512i a, __m512i b) |
INLINE __m128i | set1_128 (uint32_t x) |
INLINE __m256i | set1_256 (uint32_t x) |
INLINE __m512i | set1_512 (uint32_t x) |
INLINE __m128i | set4 (uint32_t a, uint32_t b, uint32_t c, uint32_t d) |
INLINE __m128i | rot16_128 (__m128i x) |
INLINE __m256i | rot16_256 (__m256i x) |
INLINE __m512i | rot16_512 (__m512i x) |
INLINE __m128i | rot12_128 (__m128i x) |
INLINE __m256i | rot12_256 (__m256i x) |
INLINE __m512i | rot12_512 (__m512i x) |
INLINE __m128i | rot8_128 (__m128i x) |
INLINE __m256i | rot8_256 (__m256i x) |
INLINE __m512i | rot8_512 (__m512i x) |
INLINE __m128i | rot7_128 (__m128i x) |
INLINE __m256i | rot7_256 (__m256i x) |
INLINE __m512i | rot7_512 (__m512i x) |
INLINE void | g1 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m) |
INLINE void | g2 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m) |
INLINE void | diagonalize (__m128i *row0, __m128i *row2, __m128i *row3) |
INLINE void | undiagonalize (__m128i *row0, __m128i *row2, __m128i *row3) |
INLINE void | compress_pre (__m128i rows[4], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags) |
void | blake3_compress_xof_avx512 (const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64]) |
void | blake3_compress_in_place_avx512 (uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags) |
INLINE void | round_fn4 (__m128i v[16], __m128i m[16], size_t r) |
INLINE void | transpose_vecs_128 (__m128i vecs[4]) |
INLINE void | transpose_msg_vecs4 (const uint8_t *const *inputs, size_t block_offset, __m128i out[16]) |
INLINE void | load_counters4 (uint64_t counter, bool increment_counter, __m128i *out_lo, __m128i *out_hi) |
static void | blake3_hash4_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out) |
INLINE void | round_fn8 (__m256i v[16], __m256i m[16], size_t r) |
INLINE void | transpose_vecs_256 (__m256i vecs[8]) |
INLINE void | transpose_msg_vecs8 (const uint8_t *const *inputs, size_t block_offset, __m256i out[16]) |
INLINE void | load_counters8 (uint64_t counter, bool increment_counter, __m256i *out_lo, __m256i *out_hi) |
static void | blake3_hash8_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out) |
INLINE void | round_fn16 (__m512i v[16], __m512i m[16], size_t r) |
INLINE __m512i | unpack_lo_128 (__m512i a, __m512i b) |
INLINE __m512i | unpack_hi_128 (__m512i a, __m512i b) |
INLINE void | transpose_vecs_512 (__m512i vecs[16]) |
INLINE void | transpose_msg_vecs16 (const uint8_t *const *inputs, size_t block_offset, __m512i out[16]) |
INLINE void | load_counters16 (uint64_t counter, bool increment_counter, __m512i *out_lo, __m512i *out_hi) |
static void | blake3_hash16_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out) |
INLINE void | hash_one_avx512 (const uint8_t *input, size_t blocks, const uint32_t key[8], uint64_t counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) |
void | blake3_hash_many_avx512 (const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out) |
#define _mm_shuffle_ps2 | ( | a, | |
b, | |||
c | |||
) |
Definition at line 5 of file blake3_avx512.c.
#define HI_IMM8 0xdd |
Definition at line 935 of file blake3_avx512.c.
#define LO_IMM8 0x88 |
Definition at line 928 of file blake3_avx512.c.
INLINE __m128i add_128 | ( | __m128i | a, |
__m128i | b | ||
) |
Definition at line 29 of file blake3_avx512.c.
Referenced by g1(), g2(), and round_fn4().
INLINE __m256i add_256 | ( | __m256i | a, |
__m256i | b | ||
) |
Definition at line 31 of file blake3_avx512.c.
Referenced by round_fn8().
INLINE __m512i add_512 | ( | __m512i | a, |
__m512i | b | ||
) |
Definition at line 33 of file blake3_avx512.c.
Referenced by round_fn16().
void blake3_compress_in_place_avx512 | ( | uint32_t | cv[8], |
const uint8_t | block[BLAKE3_BLOCK_LEN], | ||
uint8_t | block_len, | ||
uint64_t | counter, | ||
uint8_t | flags | ||
) |
Definition at line 299 of file blake3_avx512.c.
References block, compress_pre(), storeu_128(), and xor_128().
void blake3_compress_xof_avx512 | ( | const uint32_t | cv[8], |
const uint8_t | block[BLAKE3_BLOCK_LEN], | ||
uint8_t | block_len, | ||
uint64_t | counter, | ||
uint8_t | flags, | ||
uint8_t | out[64] | ||
) |
Definition at line 287 of file blake3_avx512.c.
References block, compress_pre(), loadu_128(), storeu_128(), and xor_128().
|
static |
Definition at line 1060 of file blake3_avx512.c.
References BLAKE3_BLOCK_LEN, block, blocks, IV, load_counters16(), round_fn16(), set1_512(), transpose_msg_vecs16(), transpose_vecs_512(), and xor_512().
Referenced by blake3_hash_many_avx512().
|
static |
Definition at line 492 of file blake3_avx512.c.
References BLAKE3_BLOCK_LEN, block, blocks, IV, load_counters4(), round_fn4(), set1_128(), storeu_128(), transpose_msg_vecs4(), transpose_vecs_128(), and xor_128().
Referenced by blake3_hash_many_avx512().
|
static |
Definition at line 747 of file blake3_avx512.c.
References BLAKE3_BLOCK_LEN, block, blocks, IV, load_counters8(), round_fn8(), set1_256(), storeu_256(), transpose_msg_vecs8(), transpose_vecs_256(), and xor_256().
Referenced by blake3_hash_many_avx512().
void blake3_hash_many_avx512 | ( | const uint8_t *const * | inputs, |
size_t | num_inputs, | ||
size_t | blocks, | ||
const uint32_t | key[8], | ||
uint64_t | counter, | ||
bool | increment_counter, | ||
uint8_t | flags, | ||
uint8_t | flags_start, | ||
uint8_t | flags_end, | ||
uint8_t * | out | ||
) |
Definition at line 1162 of file blake3_avx512.c.
References blake3_hash16_avx512(), blake3_hash4_avx512(), blake3_hash8_avx512(), BLAKE3_OUT_LEN, blocks, and hash_one_avx512().
INLINE void compress_pre | ( | __m128i | rows[4], |
const uint32_t | cv[8], | ||
const uint8_t | block[BLAKE3_BLOCK_LEN], | ||
uint8_t | block_len, | ||
uint64_t | counter, | ||
uint8_t | flags | ||
) |
Definition at line 116 of file blake3_avx512.c.
References _mm_shuffle_ps2, block, counter_high(), counter_low(), diagonalize(), g1(), g2(), IV, loadu_128(), set4(), and undiagonalize().
Referenced by blake3_compress_in_place_avx512(), and blake3_compress_xof_avx512().
INLINE void diagonalize | ( | __m128i * | row0, |
__m128i * | row2, | ||
__m128i * | row3 | ||
) |
Definition at line 104 of file blake3_avx512.c.
Referenced by compress_pre().
INLINE void g1 | ( | __m128i * | row0, |
__m128i * | row1, | ||
__m128i * | row2, | ||
__m128i * | row3, | ||
__m128i | m | ||
) |
Definition at line 81 of file blake3_avx512.c.
References add_128(), rot12_128(), rot16_128(), and xor_128().
Referenced by compress_pre().
INLINE void g2 | ( | __m128i * | row0, |
__m128i * | row1, | ||
__m128i * | row2, | ||
__m128i * | row3, | ||
__m128i | m | ||
) |
Definition at line 91 of file blake3_avx512.c.
References add_128(), rot7_128(), rot8_128(), and xor_128().
Referenced by compress_pre().
INLINE void hash_one_avx512 | ( | const uint8_t * | input, |
size_t | blocks, | ||
const uint32_t | key[8], | ||
uint64_t | counter, | ||
uint8_t | flags, | ||
uint8_t | flags_start, | ||
uint8_t | flags_end, | ||
uint8_t | out[BLAKE3_OUT_LEN] | ||
) |
Definition at line 1142 of file blake3_avx512.c.
References BLAKE3_BLOCK_LEN, blake3_compress_in_place_avx512, BLAKE3_KEY_LEN, BLAKE3_OUT_LEN, and blocks.
Referenced by blake3_hash_many_avx512().
INLINE void load_counters16 | ( | uint64_t | counter, |
bool | increment_counter, | ||
__m512i * | out_lo, | ||
__m512i * | out_hi | ||
) |
Definition at line 1047 of file blake3_avx512.c.
Referenced by blake3_hash16_avx512().
INLINE void load_counters4 | ( | uint64_t | counter, |
bool | increment_counter, | ||
__m128i * | out_lo, | ||
__m128i * | out_hi | ||
) |
Definition at line 479 of file blake3_avx512.c.
Referenced by blake3_hash4_avx512().
INLINE void load_counters8 | ( | uint64_t | counter, |
bool | increment_counter, | ||
__m256i * | out_lo, | ||
__m256i * | out_hi | ||
) |
Definition at line 734 of file blake3_avx512.c.
Referenced by blake3_hash8_avx512().
Definition at line 9 of file blake3_avx512.c.
Referenced by blake3_compress_xof_avx512(), compress_pre(), and transpose_msg_vecs4().
Definition at line 13 of file blake3_avx512.c.
Referenced by transpose_msg_vecs8().
Definition at line 17 of file blake3_avx512.c.
Referenced by transpose_msg_vecs16().
INLINE __m128i rot12_128 | ( | __m128i | x | ) |
Definition at line 57 of file blake3_avx512.c.
Referenced by g1(), and round_fn4().
INLINE __m256i rot12_256 | ( | __m256i | x | ) |
Definition at line 59 of file blake3_avx512.c.
Referenced by round_fn8().
INLINE __m512i rot12_512 | ( | __m512i | x | ) |
Definition at line 61 of file blake3_avx512.c.
Referenced by round_fn16().
INLINE __m128i rot16_128 | ( | __m128i | x | ) |
Definition at line 51 of file blake3_avx512.c.
Referenced by g1(), and round_fn4().
INLINE __m256i rot16_256 | ( | __m256i | x | ) |
Definition at line 53 of file blake3_avx512.c.
Referenced by round_fn8().
INLINE __m512i rot16_512 | ( | __m512i | x | ) |
Definition at line 55 of file blake3_avx512.c.
Referenced by round_fn16().
INLINE __m128i rot7_128 | ( | __m128i | x | ) |
Definition at line 69 of file blake3_avx512.c.
Referenced by g2(), and round_fn4().
INLINE __m256i rot7_256 | ( | __m256i | x | ) |
Definition at line 71 of file blake3_avx512.c.
Referenced by round_fn8().
INLINE __m512i rot7_512 | ( | __m512i | x | ) |
Definition at line 73 of file blake3_avx512.c.
Referenced by round_fn16().
INLINE __m128i rot8_128 | ( | __m128i | x | ) |
Definition at line 63 of file blake3_avx512.c.
Referenced by g2(), and round_fn4().
INLINE __m256i rot8_256 | ( | __m256i | x | ) |
Definition at line 65 of file blake3_avx512.c.
Referenced by round_fn8().
INLINE __m512i rot8_512 | ( | __m512i | x | ) |
Definition at line 67 of file blake3_avx512.c.
Referenced by round_fn16().
INLINE void round_fn16 | ( | __m512i | v[16], |
__m512i | m[16], | ||
size_t | r | ||
) |
Definition at line 811 of file blake3_avx512.c.
References add_512(), MSG_SCHEDULE, rot12_512(), rot16_512(), rot7_512(), rot8_512(), and xor_512().
Referenced by blake3_hash16_avx512().
INLINE void round_fn4 | ( | __m128i | v[16], |
__m128i | m[16], | ||
size_t | r | ||
) |
Definition at line 315 of file blake3_avx512.c.
References add_128(), MSG_SCHEDULE, rot12_128(), rot16_128(), rot7_128(), rot8_128(), and xor_128().
Referenced by blake3_hash4_avx512().
INLINE void round_fn8 | ( | __m256i | v[16], |
__m256i | m[16], | ||
size_t | r | ||
) |
Definition at line 559 of file blake3_avx512.c.
References add_256(), MSG_SCHEDULE, rot12_256(), rot16_256(), rot7_256(), rot8_256(), and xor_256().
Referenced by blake3_hash8_avx512().
Definition at line 41 of file blake3_avx512.c.
Referenced by blake3_hash4_avx512().
Definition at line 43 of file blake3_avx512.c.
Referenced by blake3_hash8_avx512().
Definition at line 45 of file blake3_avx512.c.
Referenced by blake3_hash16_avx512().
Definition at line 47 of file blake3_avx512.c.
Referenced by compress_pre().
INLINE void storeu_128 | ( | __m128i | src, |
uint8_t | dest[16] | ||
) |
Definition at line 21 of file blake3_avx512.c.
Referenced by blake3_compress_in_place_avx512(), blake3_compress_xof_avx512(), and blake3_hash4_avx512().
INLINE void storeu_256 | ( | __m256i | src, |
uint8_t | dest[16] | ||
) |
Definition at line 25 of file blake3_avx512.c.
Referenced by blake3_hash8_avx512().
INLINE void transpose_msg_vecs16 | ( | const uint8_t *const * | inputs, |
size_t | block_offset, | ||
__m512i | out[16] | ||
) |
Definition at line 1023 of file blake3_avx512.c.
References loadu_512(), and transpose_vecs_512().
Referenced by blake3_hash16_avx512().
INLINE void transpose_msg_vecs4 | ( | const uint8_t *const * | inputs, |
size_t | block_offset, | ||
__m128i | out[16] | ||
) |
Definition at line 452 of file blake3_avx512.c.
References loadu_128(), and transpose_vecs_128().
Referenced by blake3_hash4_avx512().
INLINE void transpose_msg_vecs8 | ( | const uint8_t *const * | inputs, |
size_t | block_offset, | ||
__m256i | out[16] | ||
) |
Definition at line 709 of file blake3_avx512.c.
References loadu_256(), and transpose_vecs_256().
Referenced by blake3_hash8_avx512().
INLINE void transpose_vecs_128 | ( | __m128i | vecs[4] | ) |
Definition at line 431 of file blake3_avx512.c.
Referenced by blake3_hash4_avx512(), and transpose_msg_vecs4().
INLINE void transpose_vecs_256 | ( | __m256i | vecs[8] | ) |
Definition at line 675 of file blake3_avx512.c.
Referenced by blake3_hash8_avx512(), and transpose_msg_vecs8().
INLINE void transpose_vecs_512 | ( | __m512i | vecs[16] | ) |
Definition at line 941 of file blake3_avx512.c.
References unpack_hi_128(), and unpack_lo_128().
Referenced by blake3_hash16_avx512(), and transpose_msg_vecs16().
INLINE void undiagonalize | ( | __m128i * | row0, |
__m128i * | row2, | ||
__m128i * | row3 | ||
) |
Definition at line 110 of file blake3_avx512.c.
Referenced by compress_pre().
INLINE __m512i unpack_hi_128 | ( | __m512i | a, |
__m512i | b | ||
) |
Definition at line 937 of file blake3_avx512.c.
References HI_IMM8.
Referenced by transpose_vecs_512().
INLINE __m512i unpack_lo_128 | ( | __m512i | a, |
__m512i | b | ||
) |
Definition at line 930 of file blake3_avx512.c.
References LO_IMM8.
Referenced by transpose_vecs_512().
INLINE __m128i xor_128 | ( | __m128i | a, |
__m128i | b | ||
) |
Definition at line 35 of file blake3_avx512.c.
Referenced by blake3_compress_in_place_avx512(), blake3_compress_xof_avx512(), blake3_hash4_avx512(), g1(), g2(), and round_fn4().
INLINE __m256i xor_256 | ( | __m256i | a, |
__m256i | b | ||
) |
Definition at line 37 of file blake3_avx512.c.
Referenced by blake3_hash8_avx512(), and round_fn8().
INLINE __m512i xor_512 | ( | __m512i | a, |
__m512i | b | ||
) |
Definition at line 39 of file blake3_avx512.c.
Referenced by blake3_hash16_avx512(), and round_fn16().