LLVM  16.0.0git
blake3_impl.h
Go to the documentation of this file.
1 #ifndef BLAKE3_IMPL_H
2 #define BLAKE3_IMPL_H
3 
4 #include <assert.h>
5 #include <stdbool.h>
6 #include <stddef.h>
7 #include <stdint.h>
8 #include <string.h>
9 
10 #include "llvm-c/blake3.h"
11 // For \p LLVM_LIBRARY_VISIBILITY
12 #include "llvm/Support/Compiler.h"
13 
14 // Remove the 'llvm_' prefix for the rest of the internal implementation.
15 #define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING
16 #define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN
17 #define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN
18 #define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN
19 #define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN
20 #define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH
21 #define blake3_hasher llvm_blake3_hasher
22 #define blake3_chunk_state llvm_blake3_chunk_state
23 
24 // internal flags
26  CHUNK_START = 1 << 0,
27  CHUNK_END = 1 << 1,
28  PARENT = 1 << 2,
29  ROOT = 1 << 3,
30  KEYED_HASH = 1 << 4,
33 };
34 
35 // This C implementation tries to support recent versions of GCC, Clang, and
36 // MSVC.
37 #if defined(_MSC_VER)
38 #define INLINE static __forceinline
39 #else
40 #define INLINE static inline __attribute__((always_inline))
41 #endif
42 
43 #if defined(__x86_64__) || defined(_M_X64)
44 #define IS_X86
45 #define IS_X86_64
46 #endif
47 
48 #if defined(__i386__) || defined(_M_IX86)
49 #define IS_X86
50 #define IS_X86_32
51 #endif
52 
53 #if defined(__aarch64__) || defined(_M_ARM64)
54 #define IS_AARCH64
55 #endif
56 
57 #if defined(IS_X86)
58 #if defined(_MSC_VER)
59 #include <intrin.h>
60 #endif
61 #include <immintrin.h>
62 #endif
63 
64 #if !defined(BLAKE3_USE_NEON)
65  // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
66  #if defined(IS_AARCH64)
67  #define BLAKE3_USE_NEON 1
68  #else
69  #define BLAKE3_USE_NEON 0
70  #endif
71 #endif
72 
73 #if defined(IS_X86)
74 #define MAX_SIMD_DEGREE 16
75 #elif BLAKE3_USE_NEON == 1
76 #define MAX_SIMD_DEGREE 4
77 #else
78 #define MAX_SIMD_DEGREE 1
79 #endif
80 
81 // There are some places where we want a static size that's equal to the
82 // MAX_SIMD_DEGREE, but also at least 2.
83 #define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
84 
85 static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
86  0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
87  0x1F83D9ABUL, 0x5BE0CD19UL};
88 
89 static const uint8_t MSG_SCHEDULE[7][16] = {
90  {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
91  {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
92  {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
93  {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
94  {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
95  {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
96  {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
97 };
98 
99 /* Find index of the highest set bit */
100 /* x is assumed to be nonzero. */
101 static unsigned int highest_one(uint64_t x) {
102 #if defined(__GNUC__) || defined(__clang__)
103  return 63 ^ __builtin_clzll(x);
104 #elif defined(_MSC_VER) && defined(IS_X86_64)
105  unsigned long index;
106  _BitScanReverse64(&index, x);
107  return index;
108 #elif defined(_MSC_VER) && defined(IS_X86_32)
109  if(x >> 32) {
110  unsigned long index;
111  _BitScanReverse(&index, (unsigned long)(x >> 32));
112  return 32 + index;
113  } else {
114  unsigned long index;
115  _BitScanReverse(&index, (unsigned long)x);
116  return index;
117  }
118 #else
119  unsigned int c = 0;
120  if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
121  if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
122  if(x & 0x000000000000ff00ULL) { x >>= 8; c += 8; }
123  if(x & 0x00000000000000f0ULL) { x >>= 4; c += 4; }
124  if(x & 0x000000000000000cULL) { x >>= 2; c += 2; }
125  if(x & 0x0000000000000002ULL) { c += 1; }
126  return c;
127 #endif
128 }
129 
130 // Count the number of 1 bits.
131 INLINE unsigned int popcnt(uint64_t x) {
132 #if defined(__GNUC__) || defined(__clang__)
133  return __builtin_popcountll(x);
134 #else
135  unsigned int count = 0;
136  while (x != 0) {
137  count += 1;
138  x &= x - 1;
139  }
140  return count;
141 #endif
142 }
143 
144 // Largest power of two less than or equal to x. As a special case, returns 1
145 // when x is 0.
147  return 1ULL << highest_one(x | 1);
148 }
149 
150 INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
151 
153  return (uint32_t)(counter >> 32);
154 }
155 
156 INLINE uint32_t load32(const void *src) {
157  const uint8_t *p = (const uint8_t *)src;
158  return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
159  ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
160 }
161 
162 INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
163  uint32_t key_words[8]) {
164  key_words[0] = load32(&key[0 * 4]);
165  key_words[1] = load32(&key[1 * 4]);
166  key_words[2] = load32(&key[2 * 4]);
167  key_words[3] = load32(&key[3 * 4]);
168  key_words[4] = load32(&key[4 * 4]);
169  key_words[5] = load32(&key[5 * 4]);
170  key_words[6] = load32(&key[6 * 4]);
171  key_words[7] = load32(&key[7 * 4]);
172 }
173 
174 INLINE void store32(void *dst, uint32_t w) {
175  uint8_t *p = (uint8_t *)dst;
176  p[0] = (uint8_t)(w >> 0);
177  p[1] = (uint8_t)(w >> 8);
178  p[2] = (uint8_t)(w >> 16);
179  p[3] = (uint8_t)(w >> 24);
180 }
181 
182 INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
183  store32(&bytes_out[0 * 4], cv_words[0]);
184  store32(&bytes_out[1 * 4], cv_words[1]);
185  store32(&bytes_out[2 * 4], cv_words[2]);
186  store32(&bytes_out[3 * 4], cv_words[3]);
187  store32(&bytes_out[4 * 4], cv_words[4]);
188  store32(&bytes_out[5 * 4], cv_words[5]);
189  store32(&bytes_out[6 * 4], cv_words[6]);
190  store32(&bytes_out[7 * 4], cv_words[7]);
191 }
192 
195  const uint8_t block[BLAKE3_BLOCK_LEN],
196  uint8_t block_len, uint64_t counter,
197  uint8_t flags);
198 
200 void blake3_compress_xof(const uint32_t cv[8],
201  const uint8_t block[BLAKE3_BLOCK_LEN],
202  uint8_t block_len, uint64_t counter, uint8_t flags,
203  uint8_t out[64]);
204 
206 void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
207  size_t blocks, const uint32_t key[8], uint64_t counter,
208  bool increment_counter, uint8_t flags,
209  uint8_t flags_start, uint8_t flags_end, uint8_t *out);
210 
212 size_t blake3_simd_degree(void);
213 
214 
215 // Declarations for implementation-specific functions.
218  const uint8_t block[BLAKE3_BLOCK_LEN],
219  uint8_t block_len, uint64_t counter,
220  uint8_t flags);
221 
223 void blake3_compress_xof_portable(const uint32_t cv[8],
224  const uint8_t block[BLAKE3_BLOCK_LEN],
225  uint8_t block_len, uint64_t counter,
226  uint8_t flags, uint8_t out[64]);
227 
229 void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
230  size_t blocks, const uint32_t key[8],
231  uint64_t counter, bool increment_counter,
232  uint8_t flags, uint8_t flags_start,
233  uint8_t flags_end, uint8_t *out);
234 
235 #if defined(IS_X86)
236 #if !defined(BLAKE3_NO_SSE2)
239  const uint8_t block[BLAKE3_BLOCK_LEN],
240  uint8_t block_len, uint64_t counter,
241  uint8_t flags);
243 void blake3_compress_xof_sse2(const uint32_t cv[8],
244  const uint8_t block[BLAKE3_BLOCK_LEN],
245  uint8_t block_len, uint64_t counter,
246  uint8_t flags, uint8_t out[64]);
248 void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
249  size_t blocks, const uint32_t key[8],
250  uint64_t counter, bool increment_counter,
251  uint8_t flags, uint8_t flags_start,
252  uint8_t flags_end, uint8_t *out);
253 #endif
254 #if !defined(BLAKE3_NO_SSE41)
257  const uint8_t block[BLAKE3_BLOCK_LEN],
258  uint8_t block_len, uint64_t counter,
259  uint8_t flags);
261 void blake3_compress_xof_sse41(const uint32_t cv[8],
262  const uint8_t block[BLAKE3_BLOCK_LEN],
263  uint8_t block_len, uint64_t counter,
264  uint8_t flags, uint8_t out[64]);
266 void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
267  size_t blocks, const uint32_t key[8],
268  uint64_t counter, bool increment_counter,
269  uint8_t flags, uint8_t flags_start,
270  uint8_t flags_end, uint8_t *out);
271 #endif
272 #if !defined(BLAKE3_NO_AVX2)
274 void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
275  size_t blocks, const uint32_t key[8],
276  uint64_t counter, bool increment_counter,
277  uint8_t flags, uint8_t flags_start,
278  uint8_t flags_end, uint8_t *out);
279 #endif
280 #if !defined(BLAKE3_NO_AVX512)
283  const uint8_t block[BLAKE3_BLOCK_LEN],
284  uint8_t block_len, uint64_t counter,
285  uint8_t flags);
286 
288 void blake3_compress_xof_avx512(const uint32_t cv[8],
289  const uint8_t block[BLAKE3_BLOCK_LEN],
290  uint8_t block_len, uint64_t counter,
291  uint8_t flags, uint8_t out[64]);
292 
294 void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
295  size_t blocks, const uint32_t key[8],
296  uint64_t counter, bool increment_counter,
297  uint8_t flags, uint8_t flags_start,
298  uint8_t flags_end, uint8_t *out);
299 #endif
300 #endif
301 
302 #if BLAKE3_USE_NEON == 1
304 void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
305  size_t blocks, const uint32_t key[8],
306  uint64_t counter, bool increment_counter,
307  uint8_t flags, uint8_t flags_start,
308  uint8_t flags_end, uint8_t *out);
309 #endif
310 
311 
312 #endif /* BLAKE3_IMPL_H */
block
we get the following basic block
Definition: README_ALTIVEC.txt:95
DERIVE_KEY_MATERIAL
@ DERIVE_KEY_MATERIAL
Definition: blake3_impl.h:32
BLAKE3_KEY_LEN
#define BLAKE3_KEY_LEN
Definition: blake3_impl.h:16
counter_high
INLINE uint32_t counter_high(uint64_t counter)
Definition: blake3_impl.h:152
blake3_flags
blake3_flags
Definition: blake3_impl.h:25
blake3_compress_in_place
LLVM_LIBRARY_VISIBILITY void blake3_compress_in_place(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
Definition: blake3_dispatch.c:137
counter_low
INLINE uint32_t counter_low(uint64_t counter)
Definition: blake3_impl.h:150
blake3_compress_xof_sse2
void blake3_compress_xof_sse2(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
Definition: blake3_sse2.c:270
BLAKE3_BLOCK_LEN
#define BLAKE3_BLOCK_LEN
Definition: blake3_impl.h:18
blake3_compress_in_place_portable
LLVM_LIBRARY_VISIBILITY void blake3_compress_in_place_portable(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
Definition: blake3_portable.c:84
blake3_compress_in_place_avx512
void blake3_compress_in_place_avx512(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
Definition: blake3_avx512.c:299
blake3_hash_many
LLVM_LIBRARY_VISIBILITY void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
Definition: blake3_dispatch.c:195
CHUNK_END
@ CHUNK_END
Definition: blake3_impl.h:27
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
blake3_simd_degree
LLVM_LIBRARY_VISIBILITY size_t blake3_simd_degree(void)
Definition: blake3_dispatch.c:248
blake3_compress_xof_avx512
void blake3_compress_xof_avx512(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
Definition: blake3_avx512.c:287
blake3_hash_many_sse2
void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
Definition: blake3_sse2.c:541
popcnt
INLINE unsigned int popcnt(uint64_t x)
Definition: blake3_impl.h:131
load32
INLINE uint32_t load32(const void *src)
Definition: blake3_impl.h:156
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
ROOT
@ ROOT
Definition: blake3_impl.h:29
MSG_SCHEDULE
static const uint8_t MSG_SCHEDULE[7][16]
Definition: blake3_impl.h:89
blake3_hash_many_avx512
void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
Definition: blake3_avx512.c:1162
blake3_compress_in_place_sse2
void blake3_compress_in_place_sse2(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
Definition: blake3_sse2.c:260
llvm::count
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1895
index
splat index
Definition: README_ALTIVEC.txt:181
uint64_t
DERIVE_KEY_CONTEXT
@ DERIVE_KEY_CONTEXT
Definition: blake3_impl.h:31
blake3_hash_many_avx2
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
Definition: blake3_avx2.c:303
store32
INLINE void store32(void *dst, uint32_t w)
Definition: blake3_impl.h:174
blake3_hash_many_portable
LLVM_LIBRARY_VISIBILITY void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
Definition: blake3_portable.c:145
highest_one
static unsigned int highest_one(uint64_t x)
Definition: blake3_impl.h:101
uint32_t
Compiler.h
LLVM_LIBRARY_VISIBILITY
#define LLVM_LIBRARY_VISIBILITY
LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked into a shared library,...
Definition: Compiler.h:126
blake3_compress_xof_sse41
void blake3_compress_xof_sse41(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
Definition: blake3_sse41.c:264
load_key_words
INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], uint32_t key_words[8])
Definition: blake3_impl.h:162
PARENT
@ PARENT
Definition: blake3_impl.h:28
blake3.h
x
TODO unsigned x
Definition: README.txt:10
blake3_compress_xof
LLVM_LIBRARY_VISIBILITY void blake3_compress_xof(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
Definition: blake3_dispatch.c:166
blake3_compress_in_place_sse41
void blake3_compress_in_place_sse41(uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
Definition: blake3_sse41.c:254
CHUNK_START
@ CHUNK_START
Definition: blake3_impl.h:26
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
round_down_to_power_of_2
INLINE uint64_t round_down_to_power_of_2(uint64_t x)
Definition: blake3_impl.h:146
blake3_compress_xof_portable
LLVM_LIBRARY_VISIBILITY void blake3_compress_xof_portable(const uint32_t cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags, uint8_t out[64])
Definition: blake3_portable.c:100
blake3_hash_many_sse41
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter, uint8_t flags, uint8_t flags_start, uint8_t flags_end, uint8_t *out)
Definition: blake3_sse41.c:535
KEYED_HASH
@ KEYED_HASH
Definition: blake3_impl.h:30
store_cv_words
INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8])
Definition: blake3_impl.h:182
INLINE
#define INLINE
Definition: blake3_impl.h:40