LLVM  16.0.0git
Macros | Functions
blake3_sse2.c File Reference
#include "blake3_impl.h"
#include <immintrin.h>
Include dependency graph for blake3_sse2.c:

Go to the source code of this file.

Macros

#define DEGREE   4
 
#define _mm_shuffle_ps2(a, b, c)
 

Functions

INLINE __m128i loadu (const uint8_t src[16])
 
INLINE void storeu (__m128i src, uint8_t dest[16])
 
INLINE __m128i addv (__m128i a, __m128i b)
 
INLINE __m128i xorv (__m128i a, __m128i b)
 
INLINE __m128i set1 (uint32_t x)
 
INLINE __m128i set4 (uint32_t a, uint32_t b, uint32_t c, uint32_t d)
 
INLINE __m128i rot16 (__m128i x)
 
INLINE __m128i rot12 (__m128i x)
 
INLINE __m128i rot8 (__m128i x)
 
INLINE __m128i rot7 (__m128i x)
 
INLINE void g1 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
 
INLINE void g2 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
 
INLINE void diagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
 
INLINE void undiagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
 
INLINE __m128i blend_epi16 (__m128i a, __m128i b, const int16_t imm8)
 
INLINE void compress_pre (__m128i rows[4], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
 
void blake3_compress_in_place_sse2 (uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
 
void blake3_compress_xof_sse2 (const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
 
INLINE void round_fn (__m128i v[16], __m128i m[16], size_t r)
 
INLINE void transpose_vecs (__m128i vecs[DEGREE])
 
INLINE void transpose_msg_vecs (const uint8_t *const *inputs, size_t block_offset, __m128i out[16])
 
INLINE void load_counters (uint64_t counter, bool increment_counter, __m128i *out_lo, __m128i *out_hi)
 
static void blake3_hash4_sse2 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 
INLINE void hash_one_sse2 (const uint8_t *input, size_t blocks, const uint32_t key[8], uint64_t counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
 
void blake3_hash_many_sse2 (const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 

Macro Definition Documentation

◆ _mm_shuffle_ps2

#define _mm_shuffle_ps2 (   a,
  b,
  c 
)
Value:
(_mm_castps_si128( \
_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))

Definition at line 7 of file blake3_sse2.c.

◆ DEGREE

#define DEGREE   4

Definition at line 5 of file blake3_sse2.c.

Function Documentation

◆ addv()

INLINE __m128i addv ( __m128i  a,
__m128i  b 
)

Definition at line 19 of file blake3_sse2.c.

References b.

Referenced by g1(), g2(), and round_fn().

◆ blake3_compress_in_place_sse2()

void blake3_compress_in_place_sse2 ( uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags 
)

Definition at line 260 of file blake3_sse2.c.

References block, compress_pre(), storeu(), and xorv().

Referenced by blake3_compress_in_place(), and hash_one_sse2().

◆ blake3_compress_xof_sse2()

void blake3_compress_xof_sse2 ( const uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags,
uint8_t  out[64] 
)

Definition at line 270 of file blake3_sse2.c.

References block, compress_pre(), loadu(), storeu(), and xorv().

Referenced by blake3_compress_xof().

◆ blake3_hash4_sse2()

static void blake3_hash4_sse2 ( const uint8_t *const inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t *  out 
)
static

◆ blake3_hash_many_sse2()

void blake3_hash_many_sse2 ( const uint8_t *const inputs,
size_t  num_inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t *  out 
)

Definition at line 541 of file blake3_sse2.c.

References blake3_hash4_sse2(), BLAKE3_OUT_LEN, DEGREE, and hash_one_sse2().

Referenced by blake3_hash_many().

◆ blend_epi16()

INLINE __m128i blend_epi16 ( __m128i  a,
__m128i  b,
const int16_t  imm8 
)

Definition at line 81 of file blake3_sse2.c.

References b, and bits.

Referenced by compress_pre().

◆ compress_pre()

INLINE void compress_pre ( __m128i  rows[4],
const uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags 
)

◆ diagonalize()

INLINE void diagonalize ( __m128i *  row0,
__m128i *  row2,
__m128i *  row3 
)

Definition at line 69 of file blake3_sse2.c.

Referenced by compress_pre().

◆ g1()

INLINE void g1 ( __m128i *  row0,
__m128i *  row1,
__m128i *  row2,
__m128i *  row3,
__m128i  m 
)

Definition at line 46 of file blake3_sse2.c.

References addv(), rot12(), rot16(), and xorv().

Referenced by compress_pre().

◆ g2()

INLINE void g2 ( __m128i *  row0,
__m128i *  row1,
__m128i *  row2,
__m128i *  row3,
__m128i  m 
)

Definition at line 56 of file blake3_sse2.c.

References addv(), rot7(), rot8(), and xorv().

Referenced by compress_pre().

◆ hash_one_sse2()

INLINE void hash_one_sse2 ( const uint8_t *  input,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t  out[BLAKE3_OUT_LEN] 
)

◆ load_counters()

INLINE void load_counters ( uint64_t  counter,
bool  increment_counter,
__m128i *  out_lo,
__m128i *  out_hi 
)

Definition at line 446 of file blake3_sse2.c.

References h, and l.

Referenced by blake3_hash4_sse2().

◆ loadu()

INLINE __m128i loadu ( const uint8_t  src[16])

Definition at line 11 of file blake3_sse2.c.

Referenced by blake3_compress_xof_sse2(), compress_pre(), and transpose_msg_vecs().

◆ rot12()

INLINE __m128i rot12 ( __m128i  x)

Definition at line 34 of file blake3_sse2.c.

References x, and xorv().

Referenced by g1(), and round_fn().

◆ rot16()

INLINE __m128i rot16 ( __m128i  x)

Definition at line 30 of file blake3_sse2.c.

References x.

Referenced by g1(), and round_fn().

◆ rot7()

INLINE __m128i rot7 ( __m128i  x)

Definition at line 42 of file blake3_sse2.c.

References x, and xorv().

Referenced by g2(), and round_fn().

◆ rot8()

INLINE __m128i rot8 ( __m128i  x)

Definition at line 38 of file blake3_sse2.c.

References x, and xorv().

Referenced by g2(), and round_fn().

◆ round_fn()

INLINE void round_fn ( __m128i  v[16],
__m128i  m[16],
size_t  r 
)

Definition at line 282 of file blake3_sse2.c.

References addv(), MSG_SCHEDULE, rot12(), rot16(), rot7(), rot8(), and xorv().

Referenced by blake3_hash4_sse2().

◆ set1()

INLINE __m128i set1 ( uint32_t  x)

Definition at line 24 of file blake3_sse2.c.

References x.

Referenced by blake3_hash4_sse2().

◆ set4()

INLINE __m128i set4 ( uint32_t  a,
uint32_t  b,
uint32_t  c,
uint32_t  d 
)

Definition at line 26 of file blake3_sse2.c.

References b, c, and d.

Referenced by compress_pre().

◆ storeu()

INLINE void storeu ( __m128i  src,
uint8_t  dest[16] 
)

◆ transpose_msg_vecs()

INLINE void transpose_msg_vecs ( const uint8_t *const inputs,
size_t  block_offset,
__m128i  out[16] 
)

Definition at line 419 of file blake3_sse2.c.

References i, loadu(), and transpose_vecs().

Referenced by blake3_hash4_sse2().

◆ transpose_vecs()

INLINE void transpose_vecs ( __m128i  vecs[DEGREE])

Definition at line 398 of file blake3_sse2.c.

Referenced by blake3_hash4_sse2(), and transpose_msg_vecs().

◆ undiagonalize()

INLINE void undiagonalize ( __m128i *  row0,
__m128i *  row2,
__m128i *  row3 
)

Definition at line 75 of file blake3_sse2.c.

Referenced by compress_pre().

◆ xorv()

INLINE __m128i xorv ( __m128i  a,
__m128i  b 
)
a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418