LLVM 20.0.0git
Macros | Functions
blake3_avx512.c File Reference
#include "blake3_impl.h"
#include <immintrin.h>

Go to the source code of this file.

Macros

#define _mm_shuffle_ps2(a, b, c)
 
#define LO_IMM8   0x88
 
#define HI_IMM8   0xdd
 

Functions

INLINE __m128i loadu_128 (const uint8_t src[16])
 
INLINE __m256i loadu_256 (const uint8_t src[32])
 
INLINE __m512i loadu_512 (const uint8_t src[64])
 
INLINE void storeu_128 (__m128i src, uint8_t dest[16])
 
INLINE void storeu_256 (__m256i src, uint8_t dest[16])
 
INLINE __m128i add_128 (__m128i a, __m128i b)
 
INLINE __m256i add_256 (__m256i a, __m256i b)
 
INLINE __m512i add_512 (__m512i a, __m512i b)
 
INLINE __m128i xor_128 (__m128i a, __m128i b)
 
INLINE __m256i xor_256 (__m256i a, __m256i b)
 
INLINE __m512i xor_512 (__m512i a, __m512i b)
 
INLINE __m128i set1_128 (uint32_t x)
 
INLINE __m256i set1_256 (uint32_t x)
 
INLINE __m512i set1_512 (uint32_t x)
 
INLINE __m128i set4 (uint32_t a, uint32_t b, uint32_t c, uint32_t d)
 
INLINE __m128i rot16_128 (__m128i x)
 
INLINE __m256i rot16_256 (__m256i x)
 
INLINE __m512i rot16_512 (__m512i x)
 
INLINE __m128i rot12_128 (__m128i x)
 
INLINE __m256i rot12_256 (__m256i x)
 
INLINE __m512i rot12_512 (__m512i x)
 
INLINE __m128i rot8_128 (__m128i x)
 
INLINE __m256i rot8_256 (__m256i x)
 
INLINE __m512i rot8_512 (__m512i x)
 
INLINE __m128i rot7_128 (__m128i x)
 
INLINE __m256i rot7_256 (__m256i x)
 
INLINE __m512i rot7_512 (__m512i x)
 
INLINE void g1 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
 
INLINE void g2 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
 
INLINE void diagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
 
INLINE void undiagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
 
INLINE void compress_pre (__m128i rows[4], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
 
void blake3_compress_xof_avx512 (const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
 
void blake3_compress_in_place_avx512 (uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
 
INLINE void round_fn4 (__m128i v[16], __m128i m[16], size_t r)
 
INLINE void transpose_vecs_128 (__m128i vecs[4])
 
INLINE void transpose_msg_vecs4 (const uint8_t *const *inputs, size_t block_offset, __m128i out[16])
 
INLINE void load_counters4 (uint64_t counter, bool increment_counter, __m128i *out_lo, __m128i *out_hi)
 
static void blake3_hash4_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 
INLINE void round_fn8 (__m256i v[16], __m256i m[16], size_t r)
 
INLINE void transpose_vecs_256 (__m256i vecs[8])
 
INLINE void transpose_msg_vecs8 (const uint8_t *const *inputs, size_t block_offset, __m256i out[16])
 
INLINE void load_counters8 (uint64_t counter, bool increment_counter, __m256i *out_lo, __m256i *out_hi)
 
static void blake3_hash8_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 
INLINE void round_fn16 (__m512i v[16], __m512i m[16], size_t r)
 
INLINE __m512i unpack_lo_128 (__m512i a, __m512i b)
 
INLINE __m512i unpack_hi_128 (__m512i a, __m512i b)
 
INLINE void transpose_vecs_512 (__m512i vecs[16])
 
INLINE void transpose_msg_vecs16 (const uint8_t *const *inputs, size_t block_offset, __m512i out[16])
 
INLINE void load_counters16 (uint64_t counter, bool increment_counter, __m512i *out_lo, __m512i *out_hi)
 
static void blake3_hash16_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 
INLINE void hash_one_avx512 (const uint8_t *input, size_t blocks, const uint32_t key[8], uint64_t counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
 
void blake3_hash_many_avx512 (const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 

Macro Definition Documentation

◆ _mm_shuffle_ps2

#define _mm_shuffle_ps2 (   a,
  b,
 
)
Value:
(_mm_castps_si128( \
_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))

Definition at line 5 of file blake3_avx512.c.

◆ HI_IMM8

#define HI_IMM8   0xdd

Definition at line 935 of file blake3_avx512.c.

◆ LO_IMM8

#define LO_IMM8   0x88

Definition at line 928 of file blake3_avx512.c.

Function Documentation

◆ add_128()

INLINE __m128i add_128 ( __m128i  a,
__m128i  b 
)

Definition at line 29 of file blake3_avx512.c.

Referenced by g1(), g2(), and round_fn4().

◆ add_256()

INLINE __m256i add_256 ( __m256i  a,
__m256i  b 
)

Definition at line 31 of file blake3_avx512.c.

Referenced by round_fn8().

◆ add_512()

INLINE __m512i add_512 ( __m512i  a,
__m512i  b 
)

Definition at line 33 of file blake3_avx512.c.

Referenced by round_fn16().

◆ blake3_compress_in_place_avx512()

void blake3_compress_in_place_avx512 ( uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags 
)

Definition at line 299 of file blake3_avx512.c.

References block, compress_pre(), storeu_128(), and xor_128().

◆ blake3_compress_xof_avx512()

void blake3_compress_xof_avx512 ( const uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags,
uint8_t  out[64] 
)

Definition at line 287 of file blake3_avx512.c.

References block, compress_pre(), loadu_128(), storeu_128(), and xor_128().

◆ blake3_hash16_avx512()

static void blake3_hash16_avx512 ( const uint8_t *const inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t out 
)
static

◆ blake3_hash4_avx512()

static void blake3_hash4_avx512 ( const uint8_t *const inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t out 
)
static

◆ blake3_hash8_avx512()

static void blake3_hash8_avx512 ( const uint8_t *const inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t out 
)
static

◆ blake3_hash_many_avx512()

void blake3_hash_many_avx512 ( const uint8_t *const inputs,
size_t  num_inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t out 
)

◆ compress_pre()

INLINE void compress_pre ( __m128i  rows[4],
const uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags 
)

◆ diagonalize()

INLINE void diagonalize ( __m128i *  row0,
__m128i *  row2,
__m128i *  row3 
)

Definition at line 104 of file blake3_avx512.c.

Referenced by compress_pre().

◆ g1()

INLINE void g1 ( __m128i *  row0,
__m128i *  row1,
__m128i *  row2,
__m128i *  row3,
__m128i  m 
)

Definition at line 81 of file blake3_avx512.c.

References add_128(), rot12_128(), rot16_128(), and xor_128().

Referenced by compress_pre().

◆ g2()

INLINE void g2 ( __m128i *  row0,
__m128i *  row1,
__m128i *  row2,
__m128i *  row3,
__m128i  m 
)

Definition at line 91 of file blake3_avx512.c.

References add_128(), rot7_128(), rot8_128(), and xor_128().

Referenced by compress_pre().

◆ hash_one_avx512()

INLINE void hash_one_avx512 ( const uint8_t input,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t  out[BLAKE3_OUT_LEN] 
)

◆ load_counters16()

INLINE void load_counters16 ( uint64_t  counter,
bool  increment_counter,
__m512i *  out_lo,
__m512i *  out_hi 
)

Definition at line 1047 of file blake3_avx512.c.

Referenced by blake3_hash16_avx512().

◆ load_counters4()

INLINE void load_counters4 ( uint64_t  counter,
bool  increment_counter,
__m128i *  out_lo,
__m128i *  out_hi 
)

Definition at line 479 of file blake3_avx512.c.

Referenced by blake3_hash4_avx512().

◆ load_counters8()

INLINE void load_counters8 ( uint64_t  counter,
bool  increment_counter,
__m256i *  out_lo,
__m256i *  out_hi 
)

Definition at line 734 of file blake3_avx512.c.

Referenced by blake3_hash8_avx512().

◆ loadu_128()

INLINE __m128i loadu_128 ( const uint8_t  src[16])

Definition at line 9 of file blake3_avx512.c.

Referenced by blake3_compress_xof_avx512(), compress_pre(), and transpose_msg_vecs4().

◆ loadu_256()

INLINE __m256i loadu_256 ( const uint8_t  src[32])

Definition at line 13 of file blake3_avx512.c.

Referenced by transpose_msg_vecs8().

◆ loadu_512()

INLINE __m512i loadu_512 ( const uint8_t  src[64])

Definition at line 17 of file blake3_avx512.c.

Referenced by transpose_msg_vecs16().

◆ rot12_128()

INLINE __m128i rot12_128 ( __m128i  x)

Definition at line 57 of file blake3_avx512.c.

Referenced by g1(), and round_fn4().

◆ rot12_256()

INLINE __m256i rot12_256 ( __m256i  x)

Definition at line 59 of file blake3_avx512.c.

Referenced by round_fn8().

◆ rot12_512()

INLINE __m512i rot12_512 ( __m512i  x)

Definition at line 61 of file blake3_avx512.c.

Referenced by round_fn16().

◆ rot16_128()

INLINE __m128i rot16_128 ( __m128i  x)

Definition at line 51 of file blake3_avx512.c.

Referenced by g1(), and round_fn4().

◆ rot16_256()

INLINE __m256i rot16_256 ( __m256i  x)

Definition at line 53 of file blake3_avx512.c.

Referenced by round_fn8().

◆ rot16_512()

INLINE __m512i rot16_512 ( __m512i  x)

Definition at line 55 of file blake3_avx512.c.

Referenced by round_fn16().

◆ rot7_128()

INLINE __m128i rot7_128 ( __m128i  x)

Definition at line 69 of file blake3_avx512.c.

Referenced by g2(), and round_fn4().

◆ rot7_256()

INLINE __m256i rot7_256 ( __m256i  x)

Definition at line 71 of file blake3_avx512.c.

Referenced by round_fn8().

◆ rot7_512()

INLINE __m512i rot7_512 ( __m512i  x)

Definition at line 73 of file blake3_avx512.c.

Referenced by round_fn16().

◆ rot8_128()

INLINE __m128i rot8_128 ( __m128i  x)

Definition at line 63 of file blake3_avx512.c.

Referenced by g2(), and round_fn4().

◆ rot8_256()

INLINE __m256i rot8_256 ( __m256i  x)

Definition at line 65 of file blake3_avx512.c.

Referenced by round_fn8().

◆ rot8_512()

INLINE __m512i rot8_512 ( __m512i  x)

Definition at line 67 of file blake3_avx512.c.

Referenced by round_fn16().

◆ round_fn16()

INLINE void round_fn16 ( __m512i  v[16],
__m512i  m[16],
size_t  r 
)

Definition at line 811 of file blake3_avx512.c.

References add_512(), MSG_SCHEDULE, rot12_512(), rot16_512(), rot7_512(), rot8_512(), and xor_512().

Referenced by blake3_hash16_avx512().

◆ round_fn4()

INLINE void round_fn4 ( __m128i  v[16],
__m128i  m[16],
size_t  r 
)

Definition at line 315 of file blake3_avx512.c.

References add_128(), MSG_SCHEDULE, rot12_128(), rot16_128(), rot7_128(), rot8_128(), and xor_128().

Referenced by blake3_hash4_avx512().

◆ round_fn8()

INLINE void round_fn8 ( __m256i  v[16],
__m256i  m[16],
size_t  r 
)

Definition at line 559 of file blake3_avx512.c.

References add_256(), MSG_SCHEDULE, rot12_256(), rot16_256(), rot7_256(), rot8_256(), and xor_256().

Referenced by blake3_hash8_avx512().

◆ set1_128()

INLINE __m128i set1_128 ( uint32_t  x)

Definition at line 41 of file blake3_avx512.c.

Referenced by blake3_hash4_avx512().

◆ set1_256()

INLINE __m256i set1_256 ( uint32_t  x)

Definition at line 43 of file blake3_avx512.c.

Referenced by blake3_hash8_avx512().

◆ set1_512()

INLINE __m512i set1_512 ( uint32_t  x)

Definition at line 45 of file blake3_avx512.c.

Referenced by blake3_hash16_avx512().

◆ set4()

INLINE __m128i set4 ( uint32_t  a,
uint32_t  b,
uint32_t  c,
uint32_t  d 
)

Definition at line 47 of file blake3_avx512.c.

Referenced by compress_pre().

◆ storeu_128()

INLINE void storeu_128 ( __m128i  src,
uint8_t  dest[16] 
)

◆ storeu_256()

INLINE void storeu_256 ( __m256i  src,
uint8_t  dest[16] 
)

Definition at line 25 of file blake3_avx512.c.

Referenced by blake3_hash8_avx512().

◆ transpose_msg_vecs16()

INLINE void transpose_msg_vecs16 ( const uint8_t *const inputs,
size_t  block_offset,
__m512i  out[16] 
)

Definition at line 1023 of file blake3_avx512.c.

References loadu_512(), and transpose_vecs_512().

Referenced by blake3_hash16_avx512().

◆ transpose_msg_vecs4()

INLINE void transpose_msg_vecs4 ( const uint8_t *const inputs,
size_t  block_offset,
__m128i  out[16] 
)

Definition at line 452 of file blake3_avx512.c.

References loadu_128(), and transpose_vecs_128().

Referenced by blake3_hash4_avx512().

◆ transpose_msg_vecs8()

INLINE void transpose_msg_vecs8 ( const uint8_t *const inputs,
size_t  block_offset,
__m256i  out[16] 
)

Definition at line 709 of file blake3_avx512.c.

References loadu_256(), and transpose_vecs_256().

Referenced by blake3_hash8_avx512().

◆ transpose_vecs_128()

INLINE void transpose_vecs_128 ( __m128i  vecs[4])

Definition at line 431 of file blake3_avx512.c.

Referenced by blake3_hash4_avx512(), and transpose_msg_vecs4().

◆ transpose_vecs_256()

INLINE void transpose_vecs_256 ( __m256i  vecs[8])

Definition at line 675 of file blake3_avx512.c.

Referenced by blake3_hash8_avx512(), and transpose_msg_vecs8().

◆ transpose_vecs_512()

INLINE void transpose_vecs_512 ( __m512i  vecs[16])

Definition at line 941 of file blake3_avx512.c.

References unpack_hi_128(), and unpack_lo_128().

Referenced by blake3_hash16_avx512(), and transpose_msg_vecs16().

◆ undiagonalize()

INLINE void undiagonalize ( __m128i *  row0,
__m128i *  row2,
__m128i *  row3 
)

Definition at line 110 of file blake3_avx512.c.

Referenced by compress_pre().

◆ unpack_hi_128()

INLINE __m512i unpack_hi_128 ( __m512i  a,
__m512i  b 
)

Definition at line 937 of file blake3_avx512.c.

References HI_IMM8.

Referenced by transpose_vecs_512().

◆ unpack_lo_128()

INLINE __m512i unpack_lo_128 ( __m512i  a,
__m512i  b 
)

Definition at line 930 of file blake3_avx512.c.

References LO_IMM8.

Referenced by transpose_vecs_512().

◆ xor_128()

INLINE __m128i xor_128 ( __m128i  a,
__m128i  b 
)

◆ xor_256()

INLINE __m256i xor_256 ( __m256i  a,
__m256i  b 
)

Definition at line 37 of file blake3_avx512.c.

Referenced by blake3_hash8_avx512(), and round_fn8().

◆ xor_512()

INLINE __m512i xor_512 ( __m512i  a,
__m512i  b 
)

Definition at line 39 of file blake3_avx512.c.

Referenced by blake3_hash16_avx512(), and round_fn16().