20 self->chunk_counter = 0;
23 self->blocks_compressed = 0;
30 self->chunk_counter = chunk_counter;
31 self->blocks_compressed = 0;
38 ((size_t)self->buf_len);
42 const uint8_t *input,
size_t input_len) {
44 if (take > input_len) {
47 uint8_t *dest = self->buf + ((size_t)self->buf_len);
48 memcpy(dest, input, take);
49 self->buf_len += (uint8_t)take;
54 if (self->blocks_compressed == 0) {
90 memcpy(cv_words, self->
input_cv, 32);
98 uint64_t output_block_counter = seek / 64;
99 size_t offset_within_block = seek % 64;
100 uint8_t wide_buf[64];
101 while (out_len > 0) {
103 output_block_counter, self->
flags |
ROOT, wide_buf);
104 size_t available_bytes = 64 - offset_within_block;
106 if (out_len > available_bytes) {
107 memcpy_len = available_bytes;
109 memcpy_len = out_len;
111 memcpy(out, wide_buf + offset_within_block, memcpy_len);
113 out_len -= memcpy_len;
114 output_block_counter += 1;
115 offset_within_block = 0;
121 if (self->buf_len > 0) {
129 self->blocks_compressed += 1;
139 self->blocks_compressed += 1;
148 uint8_t block_flags =
150 return make_output(self->cv, self->buf, self->buf_len, self->chunk_counter,
155 const uint32_t key[8], uint8_t flags) {
175 uint64_t chunk_counter, uint8_t flags,
177#if defined(BLAKE3_TESTING)
183 size_t input_position = 0;
184 size_t chunks_array_len = 0;
186 chunks_array[chunks_array_len] = &input[input_position];
188 chunks_array_len += 1;
197 if (input_len > input_position) {
201 chunk_state.chunk_counter = counter;
203 input_len - input_position);
206 return chunks_array_len + 1;
208 return chunks_array_len;
218 size_t num_chaining_values,
219 const uint32_t key[8], uint8_t flags,
221#if defined(BLAKE3_TESTING)
222 assert(2 <= num_chaining_values);
227 size_t parents_array_len = 0;
228 while (num_chaining_values - (2 * parents_array_len) >= 2) {
229 parents_array[parents_array_len] =
231 parents_array_len += 1;
242 if (num_chaining_values > 2 * parents_array_len) {
246 return parents_array_len + 1;
248 return parents_array_len;
273 uint8_t flags, uint8_t *out) {
287 size_t left_input_len =
left_len(input_len);
288 size_t right_input_len = input_len - left_input_len;
289 const uint8_t *right_input = &input[left_input_len];
310 chunk_counter, flags, cv_array);
312 right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);
323 size_t num_chaining_values = left_n + right_n;
339 const uint8_t *input,
size_t input_len,
const uint32_t key[8],
341#if defined(BLAKE3_TESTING)
347 chunk_counter, flags, cv_array);
371 self->cv_stack_len = 0;
384 size_t context_len) {
410 size_t post_merge_stack_len = (size_t)
popcnt(total_len);
411 while (self->cv_stack_len > post_merge_stack_len) {
412 uint8_t *parent_node =
416 self->cv_stack_len -= 1;
455 memcpy(&self->cv_stack[self->cv_stack_len *
BLAKE3_OUT_LEN], new_cv,
457 self->cv_stack_len += 1;
466 if (input_len == 0) {
470 const uint8_t *input_bytes = (
const uint8_t *)input;
476 if (take > input_len) {
486 uint8_t chunk_cv[32];
526 while ((((
uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
535 chunk_state.chunk_counter = self->chunk.chunk_counter;
546 self->chunk.chunk_counter,
547 self->chunk.flags, cv_pair);
550 self->chunk.chunk_counter + (subtree_chunks / 2));
552 self->chunk.chunk_counter += subtree_chunks;
553 input_bytes += subtree_len;
554 input_len -= subtree_len;
572#if LLVM_MEMORY_SANITIZER_BUILD
579 uint8_t *out,
size_t out_len) {
589 if (self->cv_stack_len == 0) {
602 size_t cvs_remaining;
604 cvs_remaining = self->cv_stack_len;
608 cvs_remaining = self->cv_stack_len - 2;
609 output =
parent_output(&self->cv_stack[cvs_remaining * 32], self->key,
612 while (cvs_remaining > 0) {
615 memcpy(parent_block, &self->cv_stack[cvs_remaining * 32], 32);
617 output =
parent_output(parent_block, self->key, self->chunk.flags);
624 self->cv_stack_len = 0;
#define __msan_unpoison(p, size)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unify loop Fixup each natural loop to have a single exit block
INLINE void output_root_bytes(const output_t *self, uint64_t seek, uint8_t *out, size_t out_len)
INLINE output_t make_output(const uint32_t input_cv[8], const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, uint64_t counter, uint8_t flags)
INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input, size_t input_len)
INLINE size_t chunk_state_len(const blake3_chunk_state *self)
void llvm_blake3_hasher_init(blake3_hasher *self)
INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8], uint8_t flags)
void llvm_blake3_hasher_reset(blake3_hasher *self)
INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8], uint8_t flags)
INLINE size_t chunk_state_fill_buf(blake3_chunk_state *self, const uint8_t *input, size_t input_len)
INLINE size_t compress_chunks_parallel(const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t *out)
INLINE void hasher_merge_cv_stack(blake3_hasher *self, uint64_t total_len)
void llvm_blake3_hasher_update(blake3_hasher *self, const void *input, size_t input_len)
void llvm_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out, size_t out_len)
static size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t *out)
INLINE uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state *self)
INLINE void output_chaining_value(const output_t *self, uint8_t cv[32])
INLINE output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN], const uint32_t key[8], uint8_t flags)
void llvm_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek, uint8_t *out, size_t out_len)
INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values, size_t num_chaining_values, const uint32_t key[8], uint8_t flags, uint8_t *out)
const char * llvm_blake3_version(void)
INLINE void compress_subtree_to_parent_node(const uint8_t *input, size_t input_len, const uint32_t key[8], uint64_t chunk_counter, uint8_t flags, uint8_t out[2 *BLAKE3_OUT_LEN])
INLINE void chunk_state_reset(blake3_chunk_state *self, const uint32_t key[8], uint64_t chunk_counter)
void llvm_blake3_hasher_init_keyed(blake3_hasher *self, const uint8_t key[BLAKE3_KEY_LEN])
INLINE output_t chunk_state_output(const blake3_chunk_state *self)
void llvm_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context, size_t context_len)
void llvm_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context)
INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN], uint64_t chunk_counter)
INLINE size_t left_len(size_t content_len)
INLINE unsigned int popcnt(uint64_t x)
static const uint32_t IV[8]
INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], uint32_t key_words[8])
#define MAX_SIMD_DEGREE_OR_2
INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8])
INLINE uint64_t round_down_to_power_of_2(uint64_t x)
#define blake3_compress_xof
#define blake3_simd_degree
#define blake3_compress_in_place
#define BLAKE3_VERSION_STRING
#define blake3_chunk_state
uint8_t block[BLAKE3_BLOCK_LEN]