LLVM 20.0.0git
Macros | Functions
blake3_sse41.c File Reference
#include "blake3_impl.h"
#include <immintrin.h>

Go to the source code of this file.

Macros

#define DEGREE   4
 
#define _mm_shuffle_ps2(a, b, c)
 

Functions

INLINE __m128i loadu (const uint8_t src[16])
 
INLINE void storeu (__m128i src, uint8_t dest[16])
 
INLINE __m128i addv (__m128i a, __m128i b)
 
INLINE __m128i xorv (__m128i a, __m128i b)
 
INLINE __m128i set1 (uint32_t x)
 
INLINE __m128i set4 (uint32_t a, uint32_t b, uint32_t c, uint32_t d)
 
INLINE __m128i rot16 (__m128i x)
 
INLINE __m128i rot12 (__m128i x)
 
INLINE __m128i rot8 (__m128i x)
 
INLINE __m128i rot7 (__m128i x)
 
INLINE void g1 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
 
INLINE void g2 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
 
INLINE void diagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
 
INLINE void undiagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
 
INLINE void compress_pre (__m128i rows[4], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
 
void blake3_compress_in_place_sse41 (uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
 
void blake3_compress_xof_sse41 (const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
 
INLINE void round_fn (__m128i v[16], __m128i m[16], size_t r)
 
INLINE void transpose_vecs (__m128i vecs[DEGREE])
 
INLINE void transpose_msg_vecs (const uint8_t *const *inputs, size_t block_offset, __m128i out[16])
 
INLINE void load_counters (uint64_t counter, bool increment_counter, __m128i *out_lo, __m128i *out_hi)
 
static void blake3_hash4_sse41 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 
INLINE void hash_one_sse41 (const uint8_t *input, size_t blocks, const uint32_t key[8], uint64_t counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
 
void blake3_hash_many_sse41 (const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 

Macro Definition Documentation

◆ _mm_shuffle_ps2

#define _mm_shuffle_ps2 (   a,
  b,
 
)
Value:
(_mm_castps_si128( \
_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))

Definition at line 7 of file blake3_sse41.c.

◆ DEGREE

#define DEGREE   4

Definition at line 5 of file blake3_sse41.c.

Function Documentation

◆ addv()

INLINE __m128i addv ( __m128i  a,
__m128i  b 
)

Definition at line 19 of file blake3_sse41.c.

Referenced by g1(), g2(), and round_fn().

◆ blake3_compress_in_place_sse41()

void blake3_compress_in_place_sse41 ( uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags 
)

Definition at line 254 of file blake3_sse41.c.

References block, compress_pre(), storeu(), and xorv().

◆ blake3_compress_xof_sse41()

void blake3_compress_xof_sse41 ( const uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags,
uint8_t  out[64] 
)

Definition at line 264 of file blake3_sse41.c.

References block, compress_pre(), loadu(), storeu(), and xorv().

◆ blake3_hash4_sse41()

static void blake3_hash4_sse41 ( const uint8_t *const inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t out 
)
static

◆ blake3_hash_many_sse41()

void blake3_hash_many_sse41 ( const uint8_t *const inputs,
size_t  num_inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t out 
)

Definition at line 535 of file blake3_sse41.c.

References blake3_hash4_sse41(), BLAKE3_OUT_LEN, blocks, DEGREE, and hash_one_sse41().

◆ compress_pre()

INLINE void compress_pre ( __m128i  rows[4],
const uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags 
)

◆ diagonalize()

INLINE void diagonalize ( __m128i *  row0,
__m128i *  row2,
__m128i *  row3 
)

Definition at line 71 of file blake3_sse41.c.

Referenced by compress_pre().

◆ g1()

INLINE void g1 ( __m128i *  row0,
__m128i *  row1,
__m128i *  row2,
__m128i *  row3,
__m128i  m 
)

Definition at line 48 of file blake3_sse41.c.

References addv(), rot12(), rot16(), and xorv().

Referenced by compress_pre().

◆ g2()

INLINE void g2 ( __m128i *  row0,
__m128i *  row1,
__m128i *  row2,
__m128i *  row3,
__m128i  m 
)

Definition at line 58 of file blake3_sse41.c.

References addv(), rot7(), rot8(), and xorv().

Referenced by compress_pre().

◆ hash_one_sse41()

INLINE void hash_one_sse41 ( const uint8_t input,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t  out[BLAKE3_OUT_LEN] 
)

◆ load_counters()

INLINE void load_counters ( uint64_t  counter,
bool  increment_counter,
__m128i *  out_lo,
__m128i *  out_hi 
)

Definition at line 440 of file blake3_sse41.c.

Referenced by blake3_hash4_sse41().

◆ loadu()

INLINE __m128i loadu ( const uint8_t  src[16])

Definition at line 11 of file blake3_sse41.c.

Referenced by blake3_compress_xof_sse41(), compress_pre(), and transpose_msg_vecs().

◆ rot12()

INLINE __m128i rot12 ( __m128i  x)

Definition at line 35 of file blake3_sse41.c.

References xorv().

Referenced by g1(), and round_fn().

◆ rot16()

INLINE __m128i rot16 ( __m128i  x)

Definition at line 30 of file blake3_sse41.c.

Referenced by g1(), and round_fn().

◆ rot7()

INLINE __m128i rot7 ( __m128i  x)

Definition at line 44 of file blake3_sse41.c.

References xorv().

Referenced by g2(), and round_fn().

◆ rot8()

INLINE __m128i rot8 ( __m128i  x)

Definition at line 39 of file blake3_sse41.c.

Referenced by g2(), and round_fn().

◆ round_fn()

INLINE void round_fn ( __m128i  v[16],
__m128i  m[16],
size_t  r 
)

Definition at line 276 of file blake3_sse41.c.

References addv(), MSG_SCHEDULE, rot12(), rot16(), rot7(), rot8(), and xorv().

Referenced by blake3_hash4_sse41().

◆ set1()

INLINE __m128i set1 ( uint32_t  x)

Definition at line 24 of file blake3_sse41.c.

Referenced by blake3_hash4_sse41().

◆ set4()

INLINE __m128i set4 ( uint32_t  a,
uint32_t  b,
uint32_t  c,
uint32_t  d 
)

Definition at line 26 of file blake3_sse41.c.

Referenced by compress_pre().

◆ storeu()

INLINE void storeu ( __m128i  src,
uint8_t  dest[16] 
)

◆ transpose_msg_vecs()

INLINE void transpose_msg_vecs ( const uint8_t *const inputs,
size_t  block_offset,
__m128i  out[16] 
)

Definition at line 413 of file blake3_sse41.c.

References loadu(), and transpose_vecs().

Referenced by blake3_hash4_sse41().

◆ transpose_vecs()

INLINE void transpose_vecs ( __m128i  vecs[DEGREE])

Definition at line 392 of file blake3_sse41.c.

Referenced by blake3_hash4_sse41(), and transpose_msg_vecs().

◆ undiagonalize()

INLINE void undiagonalize ( __m128i *  row0,
__m128i *  row2,
__m128i *  row3 
)

Definition at line 77 of file blake3_sse41.c.

Referenced by compress_pre().

◆ xorv()

INLINE __m128i xorv ( __m128i  a,
__m128i  b 
)