struct XxHash3 [src]
Alias for std.hash.xxhash.XxHash3
Fields
buffered: usize = 0
buffer: [256]u8 = undefined
total_len: usize = 0
accumulator: Accumulator
Members
Source
pub const XxHash3 = struct {
const Block = @Vector(8, u64);
const default_secret: [192]u8 = .{
0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
};
const prime_mx1 = 0x165667919E3779F9;
const prime_mx2 = 0x9FB21C651E98DF25;
inline fn avalanche(mode: union(enum) { h3, h64, rrmxmx: u64 }, x0: u64) u64 {
switch (mode) {
.h3 => {
const x1 = (x0 ^ (x0 >> 37)) *% prime_mx1;
return x1 ^ (x1 >> 32);
},
.h64 => {
const x1 = (x0 ^ (x0 >> 33)) *% XxHash64.prime_2;
const x2 = (x1 ^ (x1 >> 29)) *% XxHash64.prime_3;
return x2 ^ (x2 >> 32);
},
.rrmxmx => |len| {
const x1 = (x0 ^ rotl(u64, x0, 49) ^ rotl(u64, x0, 24)) *% prime_mx2;
const x2 = (x1 ^ ((x1 >> 35) +% len)) *% prime_mx2;
return x2 ^ (x2 >> 28);
},
}
}
inline fn fold(a: u64, b: u64) u64 {
const wide: [2]u64 = @bitCast(@as(u128, a) *% b);
return wide[0] ^ wide[1];
}
inline fn swap(x: anytype) @TypeOf(x) {
return if (native_endian == .big) @byteSwap(x) else x;
}
inline fn disableAutoVectorization(x: anytype) void {
if (!@inComptime()) asm volatile (""
:
: [x] "r" (x),
);
}
inline fn mix16(seed: u64, input: []const u8, secret: []const u8) u64 {
const blk: [4]u64 = @bitCast([_][16]u8{ input[0..16].*, secret[0..16].* });
disableAutoVectorization(seed);
return fold(
swap(blk[0]) ^ (swap(blk[2]) +% seed),
swap(blk[1]) ^ (swap(blk[3]) -% seed),
);
}
const Accumulator = extern struct {
consumed: usize = 0,
seed: u64,
secret: [192]u8 = undefined,
state: Block = Block{
XxHash32.prime_3,
XxHash64.prime_1,
XxHash64.prime_2,
XxHash64.prime_3,
XxHash64.prime_4,
XxHash32.prime_2,
XxHash64.prime_5,
XxHash32.prime_1,
},
inline fn init(seed: u64) Accumulator {
var self = Accumulator{ .seed = seed };
for (
std.mem.bytesAsSlice(Block, &self.secret),
std.mem.bytesAsSlice(Block, &default_secret),
) |*dst, src| {
dst.* = swap(swap(src) +% Block{
seed, @as(u64, 0) -% seed,
seed, @as(u64, 0) -% seed,
seed, @as(u64, 0) -% seed,
seed, @as(u64, 0) -% seed,
});
}
return self;
}
inline fn round(
noalias state: *Block,
noalias input_block: *align(1) const Block,
noalias secret_block: *align(1) const Block,
) void {
const data = swap(input_block.*);
const mixed = data ^ swap(secret_block.*);
state.* +%= (mixed & @as(Block, @splat(0xffffffff))) *% (mixed >> @splat(32));
state.* +%= @shuffle(u64, data, undefined, [_]i32{ 1, 0, 3, 2, 5, 4, 7, 6 });
}
fn accumulate(noalias self: *Accumulator, blocks: []align(1) const Block) void {
const secret = std.mem.bytesAsSlice(u64, self.secret[self.consumed * 8 ..]);
for (blocks, secret[0..blocks.len]) |*input_block, *secret_block| {
@prefetch(@as([*]const u8, @ptrCast(input_block)) + 320, .{});
round(&self.state, input_block, @ptrCast(secret_block));
}
}
fn scramble(self: *Accumulator) void {
const secret_block: Block = @bitCast(self.secret[192 - @sizeOf(Block) .. 192].*);
self.state ^= self.state >> @splat(47);
self.state ^= swap(secret_block);
self.state *%= @as(Block, @splat(XxHash32.prime_1));
}
fn consume(noalias self: *Accumulator, input_blocks: []align(1) const Block) void {
const blocks_per_scramble = 1024 / @sizeOf(Block);
std.debug.assert(self.consumed <= blocks_per_scramble);
var blocks = input_blocks;
var blocks_until_scramble = blocks_per_scramble - self.consumed;
while (blocks.len >= blocks_until_scramble) {
self.accumulate(blocks[0..blocks_until_scramble]);
self.scramble();
self.consumed = 0;
blocks = blocks[blocks_until_scramble..];
blocks_until_scramble = blocks_per_scramble;
}
self.accumulate(blocks);
self.consumed += blocks.len;
}
fn digest(noalias self: *Accumulator, total_len: u64, noalias last_block: *align(1) const Block) u64 {
const secret_block = self.secret[192 - @sizeOf(Block) - 7 ..][0..@sizeOf(Block)];
round(&self.state, last_block, @ptrCast(secret_block));
const merge_block: Block = @bitCast(self.secret[11 .. 11 + @sizeOf(Block)].*);
self.state ^= swap(merge_block);
var result = XxHash64.prime_1 *% total_len;
inline for (0..4) |i| {
result +%= fold(self.state[i * 2], self.state[i * 2 + 1]);
}
return avalanche(.h3, result);
}
};
// Public API - Oneshot
pub fn hash(seed: u64, input: anytype) u64 {
const secret = &default_secret;
if (input.len > 240) return hashLong(seed, input);
if (input.len > 128) return hash240(seed, input, secret);
if (input.len > 16) return hash128(seed, input, secret);
if (input.len > 8) return hash16(seed, input, secret);
if (input.len > 3) return hash8(seed, input, secret);
if (input.len > 0) return hash3(seed, input, secret);
const flip: [2]u64 = @bitCast(secret[56..72].*);
const key = swap(flip[0]) ^ swap(flip[1]);
return avalanche(.h64, seed ^ key);
}
fn hash3(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 {
@branchHint(.unlikely);
std.debug.assert(input.len > 0 and input.len < 4);
const flip: [2]u32 = @bitCast(secret[0..8].*);
const blk: u32 = @bitCast([_]u8{
input[input.len - 1],
@truncate(input.len),
input[0],
input[input.len / 2],
});
const key = @as(u64, swap(flip[0]) ^ swap(flip[1])) +% seed;
return avalanche(.h64, key ^ swap(blk));
}
fn hash8(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 {
@branchHint(.cold);
std.debug.assert(input.len >= 4 and input.len <= 8);
const flip: [2]u64 = @bitCast(secret[8..24].*);
const blk: [2]u32 = @bitCast([_][4]u8{
input[0..4].*,
input[input.len - 4 ..][0..4].*,
});
const mixed = seed ^ (@as(u64, @byteSwap(@as(u32, @truncate(seed)))) << 32);
const key = (swap(flip[0]) ^ swap(flip[1])) -% mixed;
const combined = (@as(u64, swap(blk[0])) << 32) +% swap(blk[1]);
return avalanche(.{ .rrmxmx = input.len }, key ^ combined);
}
fn hash16(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 {
@branchHint(.unlikely);
std.debug.assert(input.len > 8 and input.len <= 16);
const flip: [4]u64 = @bitCast(secret[24..56].*);
const blk: [2]u64 = @bitCast([_][8]u8{
input[0..8].*,
input[input.len - 8 ..][0..8].*,
});
const lo = swap(blk[0]) ^ ((swap(flip[0]) ^ swap(flip[1])) +% seed);
const hi = swap(blk[1]) ^ ((swap(flip[2]) ^ swap(flip[3])) -% seed);
const combined = @as(u64, input.len) +% @byteSwap(lo) +% hi +% fold(lo, hi);
return avalanche(.h3, combined);
}
fn hash128(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 {
@branchHint(.unlikely);
std.debug.assert(input.len > 16 and input.len <= 128);
var acc = XxHash64.prime_1 *% @as(u64, input.len);
inline for (0..4) |i| {
const in_offset = 48 - (i * 16);
const scrt_offset = 96 - (i * 32);
if (input.len > scrt_offset) {
acc +%= mix16(seed, input[in_offset..], secret[scrt_offset..]);
acc +%= mix16(seed, input[input.len - (in_offset + 16) ..], secret[scrt_offset + 16 ..]);
}
}
return avalanche(.h3, acc);
}
fn hash240(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 {
@branchHint(.unlikely);
std.debug.assert(input.len > 128 and input.len <= 240);
var acc = XxHash64.prime_1 *% @as(u64, input.len);
inline for (0..8) |i| {
acc +%= mix16(seed, input[i * 16 ..], secret[i * 16 ..]);
}
var acc_end = mix16(seed, input[input.len - 16 ..], secret[136 - 17 ..]);
for (8..(input.len / 16)) |i| {
acc_end +%= mix16(seed, input[i * 16 ..], secret[((i - 8) * 16) + 3 ..]);
disableAutoVectorization(i);
}
acc = avalanche(.h3, acc) +% acc_end;
return avalanche(.h3, acc);
}
noinline fn hashLong(seed: u64, input: []const u8) u64 {
@branchHint(.unlikely);
std.debug.assert(input.len >= 240);
const block_count = ((input.len - 1) / @sizeOf(Block)) * @sizeOf(Block);
const last_block = input[input.len - @sizeOf(Block) ..][0..@sizeOf(Block)];
var acc = Accumulator.init(seed);
acc.consume(std.mem.bytesAsSlice(Block, input[0..block_count]));
return acc.digest(input.len, @ptrCast(last_block));
}
// Public API - Streaming
buffered: usize = 0,
buffer: [256]u8 = undefined,
total_len: usize = 0,
accumulator: Accumulator,
pub fn init(seed: u64) XxHash3 {
return .{ .accumulator = Accumulator.init(seed) };
}
pub fn update(self: *XxHash3, input: anytype) void {
self.total_len += input.len;
std.debug.assert(self.buffered <= self.buffer.len);
// Copy the input into the buffer if we haven't filled it up yet.
const remaining = self.buffer.len - self.buffered;
if (input.len <= remaining) {
@memcpy(self.buffer[self.buffered..][0..input.len], input);
self.buffered += input.len;
return;
}
// Input will overflow the buffer. Fill up the buffer with some input and consume it.
var consumable: []const u8 = input;
if (self.buffered > 0) {
@memcpy(self.buffer[self.buffered..], consumable[0..remaining]);
consumable = consumable[remaining..];
self.accumulator.consume(std.mem.bytesAsSlice(Block, &self.buffer));
self.buffered = 0;
}
// The input isn't small enough to fit in the buffer. Consume it directly.
if (consumable.len > self.buffer.len) {
const block_count = ((consumable.len - 1) / @sizeOf(Block)) * @sizeOf(Block);
self.accumulator.consume(std.mem.bytesAsSlice(Block, consumable[0..block_count]));
consumable = consumable[block_count..];
// In case we consume all remaining input, write the last block to end of the buffer
// to populate the last_block_copy in final() similar to hashLong()'s last_block.
@memcpy(
self.buffer[self.buffer.len - @sizeOf(Block) .. self.buffer.len],
(consumable.ptr - @sizeOf(Block))[0..@sizeOf(Block)],
);
}
// Copy in any remaining input into the buffer.
std.debug.assert(consumable.len <= self.buffer.len);
@memcpy(self.buffer[0..consumable.len], consumable);
self.buffered = consumable.len;
}
pub fn final(self: *XxHash3) u64 {
std.debug.assert(self.buffered <= self.total_len);
std.debug.assert(self.buffered <= self.buffer.len);
// Use Oneshot hashing for smaller sizes as it doesn't use Accumulator like hashLong.
if (self.total_len <= 240) {
return hash(self.accumulator.seed, self.buffer[0..self.total_len]);
}
// Make a copy of the Accumulator state in case `self` needs to update() / be used later.
var accumulator_copy = self.accumulator;
var last_block_copy: [@sizeOf(Block)]u8 = undefined;
// Digest the last block onthe Accumulator copy.
return accumulator_copy.digest(self.total_len, last_block: {
if (self.buffered >= @sizeOf(Block)) {
const block_count = ((self.buffered - 1) / @sizeOf(Block)) * @sizeOf(Block);
accumulator_copy.consume(std.mem.bytesAsSlice(Block, self.buffer[0..block_count]));
break :last_block @ptrCast(self.buffer[self.buffered - @sizeOf(Block) ..][0..@sizeOf(Block)]);
} else {
const remaining = @sizeOf(Block) - self.buffered;
@memcpy(last_block_copy[0..remaining], self.buffer[self.buffer.len - remaining ..][0..remaining]);
@memcpy(last_block_copy[remaining..][0..self.buffered], self.buffer[0..self.buffered]);
break :last_block @ptrCast(&last_block_copy);
}
});
}
}