struct XxHash3 [src]

Alias for std.hash.xxhash.XxHash3

Fields

buffered: usize = 0
buffer: [256]u8 = undefined
total_len: usize = 0
accumulator: Accumulator

Members

Source

pub const XxHash3 = struct { const Block = @Vector(8, u64); const default_secret: [192]u8 = .{ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, }; const prime_mx1 = 0x165667919E3779F9; const prime_mx2 = 0x9FB21C651E98DF25; inline fn avalanche(mode: union(enum) { h3, h64, rrmxmx: u64 }, x0: u64) u64 { switch (mode) { .h3 => { const x1 = (x0 ^ (x0 >> 37)) *% prime_mx1; return x1 ^ (x1 >> 32); }, .h64 => { const x1 = (x0 ^ (x0 >> 33)) *% XxHash64.prime_2; const x2 = (x1 ^ (x1 >> 29)) *% XxHash64.prime_3; return x2 ^ (x2 >> 32); }, .rrmxmx => |len| { const x1 = (x0 ^ rotl(u64, x0, 49) ^ rotl(u64, x0, 24)) *% prime_mx2; const x2 = (x1 ^ ((x1 >> 35) +% len)) *% prime_mx2; return x2 ^ (x2 >> 28); }, } } inline fn fold(a: u64, b: u64) u64 { const wide: [2]u64 = @bitCast(@as(u128, a) *% b); return wide[0] ^ wide[1]; } inline fn swap(x: anytype) @TypeOf(x) { return if (native_endian == .big) @byteSwap(x) else x; } inline fn disableAutoVectorization(x: anytype) void { if (!@inComptime()) asm volatile ("" : : [x] "r" (x), ); } inline fn mix16(seed: u64, input: []const u8, secret: []const u8) u64 { const blk: [4]u64 = @bitCast([_][16]u8{ input[0..16].*, secret[0..16].* }); disableAutoVectorization(seed); return fold( swap(blk[0]) ^ (swap(blk[2]) +% seed), swap(blk[1]) ^ (swap(blk[3]) -% seed), ); } const Accumulator = extern struct { consumed: usize = 0, seed: u64, secret: [192]u8 = undefined, state: Block = Block{ XxHash32.prime_3, XxHash64.prime_1, XxHash64.prime_2, XxHash64.prime_3, XxHash64.prime_4, XxHash32.prime_2, XxHash64.prime_5, XxHash32.prime_1, }, inline fn init(seed: u64) Accumulator { var self = Accumulator{ .seed = seed }; for ( std.mem.bytesAsSlice(Block, &self.secret), std.mem.bytesAsSlice(Block, &default_secret), ) |*dst, src| { dst.* = swap(swap(src) +% Block{ seed, @as(u64, 0) -% seed, seed, @as(u64, 0) -% seed, seed, @as(u64, 0) -% seed, seed, @as(u64, 0) -% seed, }); } return self; } inline fn round( noalias state: *Block, noalias input_block: *align(1) const Block, noalias secret_block: *align(1) const Block, ) void { const data = swap(input_block.*); const mixed = data ^ swap(secret_block.*); state.* +%= (mixed & @as(Block, @splat(0xffffffff))) *% (mixed >> @splat(32)); state.* +%= @shuffle(u64, data, undefined, [_]i32{ 1, 0, 3, 2, 5, 4, 7, 6 }); } fn accumulate(noalias self: *Accumulator, blocks: []align(1) const Block) void { const secret = std.mem.bytesAsSlice(u64, self.secret[self.consumed * 8 ..]); for (blocks, secret[0..blocks.len]) |*input_block, *secret_block| { @prefetch(@as([*]const u8, @ptrCast(input_block)) + 320, .{}); round(&self.state, input_block, @ptrCast(secret_block)); } } fn scramble(self: *Accumulator) void { const secret_block: Block = @bitCast(self.secret[192 - @sizeOf(Block) .. 192].*); self.state ^= self.state >> @splat(47); self.state ^= swap(secret_block); self.state *%= @as(Block, @splat(XxHash32.prime_1)); } fn consume(noalias self: *Accumulator, input_blocks: []align(1) const Block) void { const blocks_per_scramble = 1024 / @sizeOf(Block); std.debug.assert(self.consumed <= blocks_per_scramble); var blocks = input_blocks; var blocks_until_scramble = blocks_per_scramble - self.consumed; while (blocks.len >= blocks_until_scramble) { self.accumulate(blocks[0..blocks_until_scramble]); self.scramble(); self.consumed = 0; blocks = blocks[blocks_until_scramble..]; blocks_until_scramble = blocks_per_scramble; } self.accumulate(blocks); self.consumed += blocks.len; } fn digest(noalias self: *Accumulator, total_len: u64, noalias last_block: *align(1) const Block) u64 { const secret_block = self.secret[192 - @sizeOf(Block) - 7 ..][0..@sizeOf(Block)]; round(&self.state, last_block, @ptrCast(secret_block)); const merge_block: Block = @bitCast(self.secret[11 .. 11 + @sizeOf(Block)].*); self.state ^= swap(merge_block); var result = XxHash64.prime_1 *% total_len; inline for (0..4) |i| { result +%= fold(self.state[i * 2], self.state[i * 2 + 1]); } return avalanche(.h3, result); } }; // Public API - Oneshot pub fn hash(seed: u64, input: anytype) u64 { const secret = &default_secret; if (input.len > 240) return hashLong(seed, input); if (input.len > 128) return hash240(seed, input, secret); if (input.len > 16) return hash128(seed, input, secret); if (input.len > 8) return hash16(seed, input, secret); if (input.len > 3) return hash8(seed, input, secret); if (input.len > 0) return hash3(seed, input, secret); const flip: [2]u64 = @bitCast(secret[56..72].*); const key = swap(flip[0]) ^ swap(flip[1]); return avalanche(.h64, seed ^ key); } fn hash3(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 { @branchHint(.unlikely); std.debug.assert(input.len > 0 and input.len < 4); const flip: [2]u32 = @bitCast(secret[0..8].*); const blk: u32 = @bitCast([_]u8{ input[input.len - 1], @truncate(input.len), input[0], input[input.len / 2], }); const key = @as(u64, swap(flip[0]) ^ swap(flip[1])) +% seed; return avalanche(.h64, key ^ swap(blk)); } fn hash8(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 { @branchHint(.cold); std.debug.assert(input.len >= 4 and input.len <= 8); const flip: [2]u64 = @bitCast(secret[8..24].*); const blk: [2]u32 = @bitCast([_][4]u8{ input[0..4].*, input[input.len - 4 ..][0..4].*, }); const mixed = seed ^ (@as(u64, @byteSwap(@as(u32, @truncate(seed)))) << 32); const key = (swap(flip[0]) ^ swap(flip[1])) -% mixed; const combined = (@as(u64, swap(blk[0])) << 32) +% swap(blk[1]); return avalanche(.{ .rrmxmx = input.len }, key ^ combined); } fn hash16(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 { @branchHint(.unlikely); std.debug.assert(input.len > 8 and input.len <= 16); const flip: [4]u64 = @bitCast(secret[24..56].*); const blk: [2]u64 = @bitCast([_][8]u8{ input[0..8].*, input[input.len - 8 ..][0..8].*, }); const lo = swap(blk[0]) ^ ((swap(flip[0]) ^ swap(flip[1])) +% seed); const hi = swap(blk[1]) ^ ((swap(flip[2]) ^ swap(flip[3])) -% seed); const combined = @as(u64, input.len) +% @byteSwap(lo) +% hi +% fold(lo, hi); return avalanche(.h3, combined); } fn hash128(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 { @branchHint(.unlikely); std.debug.assert(input.len > 16 and input.len <= 128); var acc = XxHash64.prime_1 *% @as(u64, input.len); inline for (0..4) |i| { const in_offset = 48 - (i * 16); const scrt_offset = 96 - (i * 32); if (input.len > scrt_offset) { acc +%= mix16(seed, input[in_offset..], secret[scrt_offset..]); acc +%= mix16(seed, input[input.len - (in_offset + 16) ..], secret[scrt_offset + 16 ..]); } } return avalanche(.h3, acc); } fn hash240(seed: u64, input: anytype, noalias secret: *const [192]u8) u64 { @branchHint(.unlikely); std.debug.assert(input.len > 128 and input.len <= 240); var acc = XxHash64.prime_1 *% @as(u64, input.len); inline for (0..8) |i| { acc +%= mix16(seed, input[i * 16 ..], secret[i * 16 ..]); } var acc_end = mix16(seed, input[input.len - 16 ..], secret[136 - 17 ..]); for (8..(input.len / 16)) |i| { acc_end +%= mix16(seed, input[i * 16 ..], secret[((i - 8) * 16) + 3 ..]); disableAutoVectorization(i); } acc = avalanche(.h3, acc) +% acc_end; return avalanche(.h3, acc); } noinline fn hashLong(seed: u64, input: []const u8) u64 { @branchHint(.unlikely); std.debug.assert(input.len >= 240); const block_count = ((input.len - 1) / @sizeOf(Block)) * @sizeOf(Block); const last_block = input[input.len - @sizeOf(Block) ..][0..@sizeOf(Block)]; var acc = Accumulator.init(seed); acc.consume(std.mem.bytesAsSlice(Block, input[0..block_count])); return acc.digest(input.len, @ptrCast(last_block)); } // Public API - Streaming buffered: usize = 0, buffer: [256]u8 = undefined, total_len: usize = 0, accumulator: Accumulator, pub fn init(seed: u64) XxHash3 { return .{ .accumulator = Accumulator.init(seed) }; } pub fn update(self: *XxHash3, input: anytype) void { self.total_len += input.len; std.debug.assert(self.buffered <= self.buffer.len); // Copy the input into the buffer if we haven't filled it up yet. const remaining = self.buffer.len - self.buffered; if (input.len <= remaining) { @memcpy(self.buffer[self.buffered..][0..input.len], input); self.buffered += input.len; return; } // Input will overflow the buffer. Fill up the buffer with some input and consume it. var consumable: []const u8 = input; if (self.buffered > 0) { @memcpy(self.buffer[self.buffered..], consumable[0..remaining]); consumable = consumable[remaining..]; self.accumulator.consume(std.mem.bytesAsSlice(Block, &self.buffer)); self.buffered = 0; } // The input isn't small enough to fit in the buffer. Consume it directly. if (consumable.len > self.buffer.len) { const block_count = ((consumable.len - 1) / @sizeOf(Block)) * @sizeOf(Block); self.accumulator.consume(std.mem.bytesAsSlice(Block, consumable[0..block_count])); consumable = consumable[block_count..]; // In case we consume all remaining input, write the last block to end of the buffer // to populate the last_block_copy in final() similar to hashLong()'s last_block. @memcpy( self.buffer[self.buffer.len - @sizeOf(Block) .. self.buffer.len], (consumable.ptr - @sizeOf(Block))[0..@sizeOf(Block)], ); } // Copy in any remaining input into the buffer. std.debug.assert(consumable.len <= self.buffer.len); @memcpy(self.buffer[0..consumable.len], consumable); self.buffered = consumable.len; } pub fn final(self: *XxHash3) u64 { std.debug.assert(self.buffered <= self.total_len); std.debug.assert(self.buffered <= self.buffer.len); // Use Oneshot hashing for smaller sizes as it doesn't use Accumulator like hashLong. if (self.total_len <= 240) { return hash(self.accumulator.seed, self.buffer[0..self.total_len]); } // Make a copy of the Accumulator state in case `self` needs to update() / be used later. var accumulator_copy = self.accumulator; var last_block_copy: [@sizeOf(Block)]u8 = undefined; // Digest the last block onthe Accumulator copy. return accumulator_copy.digest(self.total_len, last_block: { if (self.buffered >= @sizeOf(Block)) { const block_count = ((self.buffered - 1) / @sizeOf(Block)) * @sizeOf(Block); accumulator_copy.consume(std.mem.bytesAsSlice(Block, self.buffer[0..block_count])); break :last_block @ptrCast(self.buffer[self.buffered - @sizeOf(Block) ..][0..@sizeOf(Block)]); } else { const remaining = @sizeOf(Block) - self.buffered; @memcpy(last_block_copy[0..remaining], self.buffer[self.buffer.len - remaining ..][0..remaining]); @memcpy(last_block_copy[remaining..][0..self.buffered], self.buffer[0..self.buffered]); break :last_block @ptrCast(&last_block_copy); } }); } }