LLVM  16.0.0git
Macros | Functions
blake3_avx512.c File Reference
#include "blake3_impl.h"
#include <immintrin.h>
Include dependency graph for blake3_avx512.c:

Go to the source code of this file.

Macros

#define _mm_shuffle_ps2(a, b, c)
 
#define LO_IMM8   0x88
 
#define HI_IMM8   0xdd
 

Functions

INLINE __m128i loadu_128 (const uint8_t src[16])
 
INLINE __m256i loadu_256 (const uint8_t src[32])
 
INLINE __m512i loadu_512 (const uint8_t src[64])
 
INLINE void storeu_128 (__m128i src, uint8_t dest[16])
 
INLINE void storeu_256 (__m256i src, uint8_t dest[16])
 
INLINE __m128i add_128 (__m128i a, __m128i b)
 
INLINE __m256i add_256 (__m256i a, __m256i b)
 
INLINE __m512i add_512 (__m512i a, __m512i b)
 
INLINE __m128i xor_128 (__m128i a, __m128i b)
 
INLINE __m256i xor_256 (__m256i a, __m256i b)
 
INLINE __m512i xor_512 (__m512i a, __m512i b)
 
INLINE __m128i set1_128 (uint32_t x)
 
INLINE __m256i set1_256 (uint32_t x)
 
INLINE __m512i set1_512 (uint32_t x)
 
INLINE __m128i set4 (uint32_t a, uint32_t b, uint32_t c, uint32_t d)
 
INLINE __m128i rot16_128 (__m128i x)
 
INLINE __m256i rot16_256 (__m256i x)
 
INLINE __m512i rot16_512 (__m512i x)
 
INLINE __m128i rot12_128 (__m128i x)
 
INLINE __m256i rot12_256 (__m256i x)
 
INLINE __m512i rot12_512 (__m512i x)
 
INLINE __m128i rot8_128 (__m128i x)
 
INLINE __m256i rot8_256 (__m256i x)
 
INLINE __m512i rot8_512 (__m512i x)
 
INLINE __m128i rot7_128 (__m128i x)
 
INLINE __m256i rot7_256 (__m256i x)
 
INLINE __m512i rot7_512 (__m512i x)
 
INLINE void g1 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
 
INLINE void g2 (__m128i *row0, __m128i *row1, __m128i *row2, __m128i *row3, __m128i m)
 
INLINE void diagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
 
INLINE void undiagonalize (__m128i *row0, __m128i *row2, __m128i *row3)
 
INLINE void compress_pre (__m128i rows[4], const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
 
void blake3_compress_xof_avx512 (const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
 
void blake3_compress_in_place_avx512 (uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
 
INLINE void round_fn4 (__m128i v[16], __m128i m[16], size_t r)
 
INLINE void transpose_vecs_128 (__m128i vecs[4])
 
INLINE void transpose_msg_vecs4 (const uint8_t *const *inputs, size_t block_offset, __m128i out[16])
 
INLINE void load_counters4 (uint64_t counter, bool increment_counter, __m128i *out_lo, __m128i *out_hi)
 
static void blake3_hash4_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 
INLINE void round_fn8 (__m256i v[16], __m256i m[16], size_t r)
 
INLINE void transpose_vecs_256 (__m256i vecs[8])
 
INLINE void transpose_msg_vecs8 (const uint8_t *const *inputs, size_t block_offset, __m256i out[16])
 
INLINE void load_counters8 (uint64_t counter, bool increment_counter, __m256i *out_lo, __m256i *out_hi)
 
static void blake3_hash8_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 
INLINE void round_fn16 (__m512i v[16], __m512i m[16], size_t r)
 
INLINE __m512i unpack_lo_128 (__m512i a, __m512i b)
 
INLINE __m512i unpack_hi_128 (__m512i a, __m512i b)
 
INLINE void transpose_vecs_512 (__m512i vecs[16])
 
INLINE void transpose_msg_vecs16 (const uint8_t *const *inputs, size_t block_offset, __m512i out[16])
 
INLINE void load_counters16 (uint64_t counter, bool increment_counter, __m512i *out_lo, __m512i *out_hi)
 
static void blake3_hash16_avx512 (const uint8_t *const *inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 
INLINE void hash_one_avx512 (const uint8_t *input, size_t blocks, const uint32_t key[8], uint64_t counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
 
void blake3_hash_many_avx512 (const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
 

Macro Definition Documentation

◆ _mm_shuffle_ps2

#define _mm_shuffle_ps2 (   a,
  b,
  c 
)
Value:
(_mm_castps_si128( \
_mm_shuffle_ps(_mm_castsi128_ps(a), _mm_castsi128_ps(b), (c))))

Definition at line 5 of file blake3_avx512.c.

◆ HI_IMM8

#define HI_IMM8   0xdd

Definition at line 935 of file blake3_avx512.c.

◆ LO_IMM8

#define LO_IMM8   0x88

Definition at line 928 of file blake3_avx512.c.

Function Documentation

◆ add_128()

INLINE __m128i add_128 ( __m128i  a,
__m128i  b 
)

Definition at line 29 of file blake3_avx512.c.

References b.

Referenced by g1(), g2(), and round_fn4().

◆ add_256()

INLINE __m256i add_256 ( __m256i  a,
__m256i  b 
)

Definition at line 31 of file blake3_avx512.c.

References b.

Referenced by round_fn8().

◆ add_512()

INLINE __m512i add_512 ( __m512i  a,
__m512i  b 
)

Definition at line 33 of file blake3_avx512.c.

References b.

Referenced by round_fn16().

◆ blake3_compress_in_place_avx512()

void blake3_compress_in_place_avx512 ( uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags 
)

Definition at line 299 of file blake3_avx512.c.

References block, compress_pre(), storeu_128(), and xor_128().

Referenced by blake3_compress_in_place(), and hash_one_avx512().

◆ blake3_compress_xof_avx512()

void blake3_compress_xof_avx512 ( const uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags,
uint8_t  out[64] 
)

Definition at line 287 of file blake3_avx512.c.

References block, compress_pre(), loadu_128(), storeu_128(), and xor_128().

Referenced by blake3_compress_xof().

◆ blake3_hash16_avx512()

static void blake3_hash16_avx512 ( const uint8_t *const inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t *  out 
)
static

◆ blake3_hash4_avx512()

static void blake3_hash4_avx512 ( const uint8_t *const inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t *  out 
)
static

◆ blake3_hash8_avx512()

static void blake3_hash8_avx512 ( const uint8_t *const inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t *  out 
)
static

◆ blake3_hash_many_avx512()

void blake3_hash_many_avx512 ( const uint8_t *const inputs,
size_t  num_inputs,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
bool  increment_counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t *  out 
)

◆ compress_pre()

INLINE void compress_pre ( __m128i  rows[4],
const uint32_t  cv[8],
const uint8_t  block[BLAKE3_BLOCK_LEN],
uint8_t  block_len,
uint64_t  counter,
uint8_t  flags 
)

◆ diagonalize()

INLINE void diagonalize ( __m128i *  row0,
__m128i *  row2,
__m128i *  row3 
)

Definition at line 104 of file blake3_avx512.c.

Referenced by compress_pre().

◆ g1()

INLINE void g1 ( __m128i *  row0,
__m128i *  row1,
__m128i *  row2,
__m128i *  row3,
__m128i  m 
)

Definition at line 81 of file blake3_avx512.c.

References add_128(), rot12_128(), rot16_128(), and xor_128().

Referenced by compress_pre().

◆ g2()

INLINE void g2 ( __m128i *  row0,
__m128i *  row1,
__m128i *  row2,
__m128i *  row3,
__m128i  m 
)

Definition at line 91 of file blake3_avx512.c.

References add_128(), rot7_128(), rot8_128(), and xor_128().

Referenced by compress_pre().

◆ hash_one_avx512()

INLINE void hash_one_avx512 ( const uint8_t *  input,
size_t  blocks,
const uint32_t  key[8],
uint64_t  counter,
uint8_t  flags,
uint8_t  flags_start,
uint8_t  flags_end,
uint8_t  out[BLAKE3_OUT_LEN] 
)

◆ load_counters16()

INLINE void load_counters16 ( uint64_t  counter,
bool  increment_counter,
__m512i *  out_lo,
__m512i *  out_hi 
)

Definition at line 1047 of file blake3_avx512.c.

References h, and l.

Referenced by blake3_hash16_avx512().

◆ load_counters4()

INLINE void load_counters4 ( uint64_t  counter,
bool  increment_counter,
__m128i *  out_lo,
__m128i *  out_hi 
)

Definition at line 479 of file blake3_avx512.c.

Referenced by blake3_hash4_avx512().

◆ load_counters8()

INLINE void load_counters8 ( uint64_t  counter,
bool  increment_counter,
__m256i *  out_lo,
__m256i *  out_hi 
)

Definition at line 734 of file blake3_avx512.c.

Referenced by blake3_hash8_avx512().

◆ loadu_128()

INLINE __m128i loadu_128 ( const uint8_t  src[16])

Definition at line 9 of file blake3_avx512.c.

Referenced by blake3_compress_xof_avx512(), compress_pre(), and transpose_msg_vecs4().

◆ loadu_256()

INLINE __m256i loadu_256 ( const uint8_t  src[32])

Definition at line 13 of file blake3_avx512.c.

Referenced by transpose_msg_vecs8().

◆ loadu_512()

INLINE __m512i loadu_512 ( const uint8_t  src[64])

Definition at line 17 of file blake3_avx512.c.

Referenced by transpose_msg_vecs16().

◆ rot12_128()

INLINE __m128i rot12_128 ( __m128i  x)

Definition at line 57 of file blake3_avx512.c.

References x.

Referenced by g1(), and round_fn4().

◆ rot12_256()

INLINE __m256i rot12_256 ( __m256i  x)

Definition at line 59 of file blake3_avx512.c.

References x.

Referenced by round_fn8().

◆ rot12_512()

INLINE __m512i rot12_512 ( __m512i  x)

Definition at line 61 of file blake3_avx512.c.

References x.

Referenced by round_fn16().

◆ rot16_128()

INLINE __m128i rot16_128 ( __m128i  x)

Definition at line 51 of file blake3_avx512.c.

References x.

Referenced by g1(), and round_fn4().

◆ rot16_256()

INLINE __m256i rot16_256 ( __m256i  x)

Definition at line 53 of file blake3_avx512.c.

References x.

Referenced by round_fn8().

◆ rot16_512()

INLINE __m512i rot16_512 ( __m512i  x)

Definition at line 55 of file blake3_avx512.c.

References x.

Referenced by round_fn16().

◆ rot7_128()

INLINE __m128i rot7_128 ( __m128i  x)

Definition at line 69 of file blake3_avx512.c.

References x.

Referenced by g2(), and round_fn4().

◆ rot7_256()

INLINE __m256i rot7_256 ( __m256i  x)

Definition at line 71 of file blake3_avx512.c.

References x.

Referenced by round_fn8().

◆ rot7_512()

INLINE __m512i rot7_512 ( __m512i  x)

Definition at line 73 of file blake3_avx512.c.

References x.

Referenced by round_fn16().

◆ rot8_128()

INLINE __m128i rot8_128 ( __m128i  x)

Definition at line 63 of file blake3_avx512.c.

References x.

Referenced by g2(), and round_fn4().

◆ rot8_256()

INLINE __m256i rot8_256 ( __m256i  x)

Definition at line 65 of file blake3_avx512.c.

References x.

Referenced by round_fn8().

◆ rot8_512()

INLINE __m512i rot8_512 ( __m512i  x)

Definition at line 67 of file blake3_avx512.c.

References x.

Referenced by round_fn16().

◆ round_fn16()

INLINE void round_fn16 ( __m512i  v[16],
__m512i  m[16],
size_t  r 
)

Definition at line 811 of file blake3_avx512.c.

References add_512(), MSG_SCHEDULE, rot12_512(), rot16_512(), rot7_512(), rot8_512(), and xor_512().

Referenced by blake3_hash16_avx512().

◆ round_fn4()

INLINE void round_fn4 ( __m128i  v[16],
__m128i  m[16],
size_t  r 
)

Definition at line 315 of file blake3_avx512.c.

References add_128(), MSG_SCHEDULE, rot12_128(), rot16_128(), rot7_128(), rot8_128(), and xor_128().

Referenced by blake3_hash4_avx512().

◆ round_fn8()

INLINE void round_fn8 ( __m256i  v[16],
__m256i  m[16],
size_t  r 
)

Definition at line 559 of file blake3_avx512.c.

References add_256(), MSG_SCHEDULE, rot12_256(), rot16_256(), rot7_256(), rot8_256(), and xor_256().

Referenced by blake3_hash8_avx512().

◆ set1_128()

INLINE __m128i set1_128 ( uint32_t  x)

Definition at line 41 of file blake3_avx512.c.

References x.

Referenced by blake3_hash4_avx512().

◆ set1_256()

INLINE __m256i set1_256 ( uint32_t  x)

Definition at line 43 of file blake3_avx512.c.

References x.

Referenced by blake3_hash8_avx512().

◆ set1_512()

INLINE __m512i set1_512 ( uint32_t  x)

Definition at line 45 of file blake3_avx512.c.

References x.

Referenced by blake3_hash16_avx512().

◆ set4()

INLINE __m128i set4 ( uint32_t  a,
uint32_t  b,
uint32_t  c,
uint32_t  d 
)

Definition at line 47 of file blake3_avx512.c.

References b, c, and d.

Referenced by compress_pre().

◆ storeu_128()

INLINE void storeu_128 ( __m128i  src,
uint8_t  dest[16] 
)

◆ storeu_256()

INLINE void storeu_256 ( __m256i  src,
uint8_t  dest[16] 
)

Definition at line 25 of file blake3_avx512.c.

Referenced by blake3_hash8_avx512().

◆ transpose_msg_vecs16()

INLINE void transpose_msg_vecs16 ( const uint8_t *const inputs,
size_t  block_offset,
__m512i  out[16] 
)

Definition at line 1023 of file blake3_avx512.c.

References i, loadu_512(), and transpose_vecs_512().

Referenced by blake3_hash16_avx512().

◆ transpose_msg_vecs4()

INLINE void transpose_msg_vecs4 ( const uint8_t *const inputs,
size_t  block_offset,
__m128i  out[16] 
)

Definition at line 452 of file blake3_avx512.c.

References i, loadu_128(), and transpose_vecs_128().

Referenced by blake3_hash4_avx512().

◆ transpose_msg_vecs8()

INLINE void transpose_msg_vecs8 ( const uint8_t *const inputs,
size_t  block_offset,
__m256i  out[16] 
)

Definition at line 709 of file blake3_avx512.c.

References i, loadu_256(), and transpose_vecs_256().

Referenced by blake3_hash8_avx512().

◆ transpose_vecs_128()

INLINE void transpose_vecs_128 ( __m128i  vecs[4])

Definition at line 431 of file blake3_avx512.c.

Referenced by blake3_hash4_avx512(), and transpose_msg_vecs4().

◆ transpose_vecs_256()

INLINE void transpose_vecs_256 ( __m256i  vecs[8])

Definition at line 675 of file blake3_avx512.c.

Referenced by blake3_hash8_avx512(), and transpose_msg_vecs8().

◆ transpose_vecs_512()

INLINE void transpose_vecs_512 ( __m512i  vecs[16])

Definition at line 941 of file blake3_avx512.c.

References unpack_hi_128(), and unpack_lo_128().

Referenced by blake3_hash16_avx512(), and transpose_msg_vecs16().

◆ undiagonalize()

INLINE void undiagonalize ( __m128i *  row0,
__m128i *  row2,
__m128i *  row3 
)

Definition at line 110 of file blake3_avx512.c.

Referenced by compress_pre().

◆ unpack_hi_128()

INLINE __m512i unpack_hi_128 ( __m512i  a,
__m512i  b 
)

Definition at line 937 of file blake3_avx512.c.

References b, and HI_IMM8.

Referenced by transpose_vecs_512().

◆ unpack_lo_128()

INLINE __m512i unpack_lo_128 ( __m512i  a,
__m512i  b 
)

Definition at line 930 of file blake3_avx512.c.

References b, and LO_IMM8.

Referenced by transpose_vecs_512().

◆ xor_128()

INLINE __m128i xor_128 ( __m128i  a,
__m128i  b 
)

◆ xor_256()

INLINE __m256i xor_256 ( __m256i  a,
__m256i  b 
)

Definition at line 37 of file blake3_avx512.c.

References b.

Referenced by blake3_hash8_avx512(), and round_fn8().

◆ xor_512()

INLINE __m512i xor_512 ( __m512i  a,
__m512i  b 
)

Definition at line 39 of file blake3_avx512.c.

References b.

Referenced by blake3_hash16_avx512(), and round_fn16().

a
=0.0 ? 0.0 :(a > 0.0 ? 1.0 :-1.0) a
Definition: README.txt:489
b
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int b
Definition: README.txt:418
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418