50#if !defined(LLVM_XXH_USE_NEON)
51#if (defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) && \
52 !defined(__ARM_BIG_ENDIAN)
53#define LLVM_XXH_USE_NEON 1
55#define LLVM_XXH_USE_NEON 0
64using namespace support;
67 return (
X << R) | (
X >> (64 - R));
104 size_t Len = Data.size();
106 const unsigned char *
P = Data.bytes_begin();
107 const unsigned char *
const BEnd = Data.bytes_end();
111 const unsigned char *
const Limit = BEnd - 32;
126 }
while (
P <= Limit);
140 while (
reinterpret_cast<uintptr_t
>(
P) + 8 <=
141 reinterpret_cast<uintptr_t
>(BEnd)) {
148 if (
reinterpret_cast<uintptr_t
>(
P) + 4 <=
reinterpret_cast<uintptr_t
>(BEnd)) {
164 return xxHash64({(
const char *)Data.data(), Data.size()});
173 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
174 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
175 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
176 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
177 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
178 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
179 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
180 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
181 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
182 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
183 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
184 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
193#if defined(__SIZEOF_INT128__) || \
194 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
195 __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs;
200 const uint64_t lo_lo = (lhs & 0xFFFFFFFF) * (rhs & 0xFFFFFFFF);
201 const uint64_t hi_lo = (lhs >> 32) * (rhs & 0xFFFFFFFF);
202 const uint64_t lo_hi = (lhs & 0xFFFFFFFF) * (rhs >> 32);
203 const uint64_t hi_hi = (lhs >> 32) * (rhs >> 32);
206 const uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
207 const uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
208 const uint64_t lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
210 return upper ^ lower;
228 const uint8_t c2 = input[len >> 1];
229 const uint8_t c3 = input[len - 1];
252 return acc ^ (acc >> 28);
299 acc_end =
XXH3_mix16B(input + len - 16, secret + 16, seed);
302 acc_end +=
XXH3_mix16B(input + len - 32, secret + 48, seed);
305 acc_end +=
XXH3_mix16B(input + len - 48, secret + 80, seed);
308 acc_end +=
XXH3_mix16B(input + len - 64, secret + 112, seed);
323 const unsigned nbRounds = len / 16;
324 for (
unsigned i = 0; i < 8; ++i)
325 acc +=
XXH3_mix16B(input + 16 * i, secret + 16 * i, seed);
328 for (
unsigned i = 8; i < nbRounds; ++i) {
341#define XXH3_accumulate_512 XXH3_accumulate_512_neon
342#define XXH3_scrambleAcc XXH3_scrambleAcc_neon
352#if defined(__GNUC__) || defined(__clang__)
353#define XXH_ALIASING __attribute__((__may_alias__))
358typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
361 return vreinterpretq_u64_u8(vld1q_u8((
uint8_t const *)ptr));
365static void XXH3_accumulate_512_neon(
uint64_t *acc,
const uint8_t *input,
367 xxh_aliasing_uint64x2_t *
const xacc = (xxh_aliasing_uint64x2_t *)acc;
370#pragma clang loop unroll(full)
372 for (
size_t i = 0; i <
XXH_ACC_NB / 2; i += 2) {
374 uint64x2_t data_vec_1 = XXH_vld1q_u64(input + (i * 16));
375 uint64x2_t data_vec_2 = XXH_vld1q_u64(input + ((i + 1) * 16));
378 uint64x2_t key_vec_1 = XXH_vld1q_u64(secret + (i * 16));
379 uint64x2_t key_vec_2 = XXH_vld1q_u64(secret + ((i + 1) * 16));
382 uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
383 uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
386 uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
387 uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
402 uint32x4x2_t unzipped = vuzpq_u32(vreinterpretq_u32_u64(data_key_1),
403 vreinterpretq_u32_u64(data_key_2));
406 uint32x4_t data_key_lo = unzipped.val[0];
408 uint32x4_t data_key_hi = unzipped.val[1];
418 uint64x2_t sum_1 = vmlal_u32(data_swap_1, vget_low_u32(data_key_lo),
419 vget_low_u32(data_key_hi));
420 uint64x2_t sum_2 = vmlal_u32(data_swap_2, vget_high_u32(data_key_lo),
421 vget_high_u32(data_key_hi));
424 xacc[i] = vaddq_u64(xacc[i], sum_1);
425 xacc[i + 1] = vaddq_u64(xacc[i + 1], sum_2);
430static void XXH3_scrambleAcc_neon(
uint64_t *acc,
const uint8_t *secret) {
431 xxh_aliasing_uint64x2_t *
const xacc = (xxh_aliasing_uint64x2_t *)acc;
434 uint32x2_t
const kPrimeLo = vdup_n_u32(
PRIME32_1);
436 uint32x4_t
const kPrimeHi =
441 uint64x2_t acc_vec = XXH_vld1q_u64(acc + (2 * i));
442 uint64x2_t shifted = vshrq_n_u64(acc_vec, 47);
443 uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
446 uint64x2_t key_vec = XXH_vld1q_u64(secret + (i * 16));
447 uint64x2_t data_key = veorq_u64(data_vec, key_vec);
463 uint32x4_t prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), kPrimeHi);
466 uint32x2_t data_key_lo = vmovn_u64(data_key);
469 xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
474#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
475#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
483 acc[i ^ 1] += data_val;
484 acc[i] +=
uint32_t(data_key) * (data_key >> 32);
491 acc[i] ^= acc[i] >> 47;
500 const uint8_t *secret,
size_t nbStripes) {
501 for (
size_t n = 0; n < nbStripes; ++n) {
515 for (
size_t i = 0; i < 4; ++i)
522 const uint8_t *secret,
size_t secretSize) {
523 const size_t nbStripesPerBlock =
526 const size_t nb_blocks = (len - 1) / block_len;
531 for (
size_t n = 0; n < nb_blocks; ++n) {
532 XXH3_accumulate(acc, input + n * block_len, secret, nbStripesPerBlock);
537 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) /
XXH_STRIPE_LEN;
539 XXH3_accumulate(acc, input + nb_blocks * block_len, secret, nbStripes);
542 constexpr size_t XXH_SECRET_LASTACC_START = 7;
545 XXH_SECRET_LASTACC_START);
548 constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
554 auto *in = data.
data();
555 size_t len = data.
size();
595#if __has_builtin(__builtin_rotateleft32) && \
596 __has_builtin(__builtin_rotateleft64)
597#define XXH_rotl32 __builtin_rotateleft32
598#define XXH_rotl64 __builtin_rotateleft64
601#elif defined(_MSC_VER)
602#define XXH_rotl32(x, r) _rotl(x, r)
603#define XXH_rotl64(x, r) _rotl64(x, r)
605#define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
606#define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
609#define XXH_mult32to64(x, y) ((uint64_t)(uint32_t)(x) * (uint64_t)(uint32_t)(y))
636#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) && \
637 defined(__SIZEOF_INT128__) || \
638 (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
640 __uint128_t
const product = (__uint128_t)lhs * (__uint128_t)rhs;
654#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
657#pragma intrinsic(_umul128)
660 uint64_t const product_low = _umul128(lhs, rhs, &product_high);
662 r128.
low64 = product_low;
663 r128.
high64 = product_high;
671#elif defined(_M_ARM64) || defined(_M_ARM64EC)
674#pragma intrinsic(__umulh)
677 r128.
low64 = lhs * rhs;
678 r128.
high64 = __umulh(lhs, rhs);
732 uint64_t const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
733 uint64_t const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
734 uint64_t const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
746 return v64 ^ (v64 >> shift);
759 uint8_t const c2 = input[len >> 1];
760 uint8_t const c3 = input[len - 1];
785 uint64_t const keyed = input_64 ^ bitflip;
817 input_hi ^= bitfliph;
825 if (
sizeof(
void *) <
sizeof(
uint64_t)) {
832 m128.
high64 += (input_hi & 0xFFFFFFFF00000000ULL) +
922 XXH128_mix32B(acc, input + 48, input + len - 64, secret + 96, seed);
924 acc =
XXH128_mix32B(acc, input + 32, input + len - 48, secret + 64, seed);
926 acc =
XXH128_mix32B(acc, input + 16, input + len - 32, secret + 32, seed);
928 acc =
XXH128_mix32B(acc, input, input + len - 16, secret, seed);
953 for (i = 32; i < 160; i += 32) {
954 acc =
XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + i - 32,
964 for (i = 160; i <= len; i += 32) {
986 const size_t nbStripesPerBlock =
989 const size_t nb_blocks = (len - 1) / block_len;
995 for (
size_t n = 0; n < nb_blocks; ++n) {
996 XXH3_accumulate(acc, input + n * block_len, secret, nbStripesPerBlock);
1001 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) /
XXH_STRIPE_LEN;
1003 XXH3_accumulate(acc, input + nb_blocks * block_len, secret, nbStripes);
1006 constexpr size_t XXH_SECRET_LASTACC_START = 7;
1009 XXH_SECRET_LASTACC_START);
1012 static_assert(
sizeof(acc) == 64);
1014 constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
1018 acc, secret + secretSize -
sizeof(acc) - XXH_SECRET_MERGEACCS_START,
1024 size_t len = data.
size();
#define LLVM_ATTRIBUTE_ALWAYS_INLINE
LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do so, mark a method "always...
#define LLVM_ATTRIBUTE_NOINLINE
LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so, mark a method "not for inl...
#define LLVM_LIKELY(EXPR)
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static ManagedStatic< cl::opt< uint64_t >, CreateSeed > Seed
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
StringRef - Represent a constant reference to a string, i.e.
uint64_t read64le(const void *P)
uint32_t read32le(const void *P)
This is an optimization pass for GlobalISel generic memory operations.
uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
constexpr T byteswap(T V) noexcept
Reverses the bytes in the given integer value V.
XXH128_hash_t xxh3_128bits(ArrayRef< uint8_t > data)
XXH3's 128-bit variant.
uint64_t xxHash64(llvm::StringRef Data)
The return value from 128-bit hashes.
uint64_t low64
value & 0xFFFFFFFFFFFFFFFF
uint64_t high64
value >> 64
static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
static uint64_t mergeRound(uint64_t Acc, uint64_t Val)
#define XXH_mult32to64(x, y)
static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
static LLVM_ATTRIBUTE_NOINLINE uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize)
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_accumulate(uint64_t *acc, const uint8_t *input, const uint8_t *secret, size_t nbStripes)
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_1to3_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
static const uint64_t PRIME64_3
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH128_mix32B(XXH128_hash_t acc, const uint8_t *input_1, const uint8_t *input_2, const uint8_t *secret, uint64_t seed)
static uint64_t XXH3_mergeAccs(const uint64_t *acc, const uint8_t *key, uint64_t start)
static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret, uint64_t seed)
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_17to128_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize, uint64_t seed)
static const uint64_t PRIME64_1
static XXH128_hash_t XXH_mult64to128(uint64_t lhs, uint64_t rhs)
Calculates a 64->128-bit long multiply.
static uint64_t XXH3_avalanche(uint64_t hash)
#define XXH3_accumulate_512
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_4to8_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
constexpr size_t XXH3_SECRETSIZE_MIN
static LLVM_ATTRIBUTE_NOINLINE XXH128_hash_t XXH3_len_129to240_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize, uint64_t seed)
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_9to16_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
constexpr uint32_t PRIME32_1
constexpr uint32_t PRIME32_2
static LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_len_0to16_128b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
static const uint64_t PRIME64_2
constexpr size_t XXH3_MIDSIZE_MAX
constexpr uint8_t kSecret[XXH_SECRET_DEFAULT_SIZE]
static LLVM_ATTRIBUTE_ALWAYS_INLINE uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
static LLVM_ATTRIBUTE_NOINLINE uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t seed)
constexpr uint32_t PRIME32_3
constexpr size_t XXH_STRIPE_LEN
static const uint64_t PRIME64_4
constexpr size_t XXH_ACC_NB
static const uint64_t PRIME64_5
LLVM_ATTRIBUTE_ALWAYS_INLINE constexpr uint64_t XXH_xorshift64(uint64_t v64, int shift)
Seems to produce slightly better code on GCC for some reason.
LLVM_ATTRIBUTE_ALWAYS_INLINE XXH128_hash_t XXH3_hashLong_128b(const uint8_t *input, size_t len, const uint8_t *secret, size_t secretSize)
static uint64_t rotl64(uint64_t X, size_t R)
static uint64_t XXH64_avalanche(uint64_t hash)
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_scrambleAcc_scalar(uint64_t *acc, const uint8_t *secret)
constexpr size_t XXH_SECRET_DEFAULT_SIZE
constexpr uint64_t PRIME_MX1
static LLVM_ATTRIBUTE_ALWAYS_INLINE void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input, const uint8_t *secret)
static uint64_t XXH3_mix2Accs(const uint64_t *acc, const uint8_t *secret)
constexpr size_t XXH3_MIDSIZE_STARTOFFSET
static LLVM_ATTRIBUTE_ALWAYS_INLINE uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len, const uint8_t *secret, uint64_t const seed)
static uint64_t XXH3_mul128_fold64(uint64_t lhs, uint64_t rhs)
static uint64_t round(uint64_t Acc, uint64_t Input)
constexpr size_t XXH_SECRET_CONSUME_RATE
constexpr uint64_t PRIME_MX2
constexpr size_t XXH3_MIDSIZE_LASTOFFSET