struct Decode [src]
Fields
repeat_offsets: [3]u32
offset: StateData(8)
match: StateData(9)
literal: StateData(9)
literal_fse_buffer: [zstd.table_size_max.literal]Table.Fse
match_fse_buffer: [zstd.table_size_max.match]Table.Fse
offset_fse_buffer: [zstd.table_size_max.offset]Table.Fse
fse_tables_undefined: bool
literal_stream_reader: ReverseBitReader
literal_stream_index: usize
literal_streams: LiteralsSection.Streams
literal_header: LiteralsSection.Header
huffman_tree: ?LiteralsSection.HuffmanTree
literal_written_count: usize
Members
- prepare (Function)
- PrepareError (Error Set)
- readInitialFseState (Function)
Source
pub const Decode = struct {
repeat_offsets: [3]u32,
offset: StateData(8),
match: StateData(9),
literal: StateData(9),
literal_fse_buffer: [zstd.table_size_max.literal]Table.Fse,
match_fse_buffer: [zstd.table_size_max.match]Table.Fse,
offset_fse_buffer: [zstd.table_size_max.offset]Table.Fse,
fse_tables_undefined: bool,
literal_stream_reader: ReverseBitReader,
literal_stream_index: usize,
literal_streams: LiteralsSection.Streams,
literal_header: LiteralsSection.Header,
huffman_tree: ?LiteralsSection.HuffmanTree,
literal_written_count: usize,
fn StateData(comptime max_accuracy_log: comptime_int) type {
return struct {
state: @This().State,
table: Table,
accuracy_log: u8,
const State = std.meta.Int(.unsigned, max_accuracy_log);
};
}
const init: Decode = .{
.repeat_offsets = .{
zstd.start_repeated_offset_1,
zstd.start_repeated_offset_2,
zstd.start_repeated_offset_3,
},
.offset = undefined,
.match = undefined,
.literal = undefined,
.literal_fse_buffer = undefined,
.match_fse_buffer = undefined,
.offset_fse_buffer = undefined,
.fse_tables_undefined = true,
.literal_written_count = 0,
.literal_header = undefined,
.literal_streams = undefined,
.literal_stream_reader = undefined,
.literal_stream_index = undefined,
.huffman_tree = null,
};
pub const PrepareError = error{
/// the (reversed) literal bitstream's first byte does not have any bits set
MissingStartBit,
/// `literals` is a treeless literals section and the decode state does not
/// have a Huffman tree from a previous block
TreelessLiteralsFirst,
/// on the first call if one of the sequence FSE tables is set to repeat mode
RepeatModeFirst,
/// an FSE table has an invalid accuracy
MalformedAccuracyLog,
/// failed decoding an FSE table
MalformedFseTable,
/// input stream ends before all FSE tables are read
EndOfStream,
ReadFailed,
InputBufferUndersize,
};
/// Prepare the decoder to decode a compressed block. Loads the
/// literals stream and Huffman tree from `literals` and reads the
/// FSE tables from `in`.
pub fn prepare(
self: *Decode,
in: *Reader,
remaining: *Limit,
literals: LiteralsSection,
sequences_header: SequencesSection.Header,
) PrepareError!void {
self.literal_written_count = 0;
self.literal_header = literals.header;
self.literal_streams = literals.streams;
if (literals.huffman_tree) |tree| {
self.huffman_tree = tree;
} else if (literals.header.block_type == .treeless and self.huffman_tree == null) {
return error.TreelessLiteralsFirst;
}
switch (literals.header.block_type) {
.raw, .rle => {},
.compressed, .treeless => {
self.literal_stream_index = 0;
switch (literals.streams) {
.one => |slice| try self.initLiteralStream(slice),
.four => |streams| try self.initLiteralStream(streams[0]),
}
},
}
if (sequences_header.sequence_count > 0) {
try self.updateFseTable(in, remaining, .literal, sequences_header.literal_lengths);
try self.updateFseTable(in, remaining, .offset, sequences_header.offsets);
try self.updateFseTable(in, remaining, .match, sequences_header.match_lengths);
self.fse_tables_undefined = false;
}
}
/// Read initial FSE states for sequence decoding.
pub fn readInitialFseState(self: *Decode, bit_reader: *ReverseBitReader) error{EndOfStream}!void {
self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log);
self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log);
self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log);
}
fn updateRepeatOffset(self: *Decode, offset: u32) void {
self.repeat_offsets[2] = self.repeat_offsets[1];
self.repeat_offsets[1] = self.repeat_offsets[0];
self.repeat_offsets[0] = offset;
}
fn useRepeatOffset(self: *Decode, index: usize) u32 {
if (index == 1)
std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1])
else if (index == 2) {
std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]);
std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]);
}
return self.repeat_offsets[0];
}
const WhichFse = enum { offset, match, literal };
/// TODO: don't use `@field`
fn updateState(
self: *Decode,
comptime choice: WhichFse,
bit_reader: *ReverseBitReader,
) error{ MalformedFseBits, EndOfStream }!void {
switch (@field(self, @tagName(choice)).table) {
.rle => {},
.fse => |table| {
const data = table[@field(self, @tagName(choice)).state];
const T = @TypeOf(@field(self, @tagName(choice))).State;
const bits_summand = try bit_reader.readBitsNoEof(T, data.bits);
const next_state = std.math.cast(
@TypeOf(@field(self, @tagName(choice))).State,
data.baseline + bits_summand,
) orelse return error.MalformedFseBits;
@field(self, @tagName(choice)).state = next_state;
},
}
}
const FseTableError = error{
MalformedFseTable,
MalformedAccuracyLog,
RepeatModeFirst,
EndOfStream,
};
/// TODO: don't use `@field`
fn updateFseTable(
self: *Decode,
in: *Reader,
remaining: *Limit,
comptime choice: WhichFse,
mode: SequencesSection.Header.Mode,
) !void {
const field_name = @tagName(choice);
switch (mode) {
.predefined => {
@field(self, field_name).accuracy_log =
@field(zstd.default_accuracy_log, field_name);
@field(self, field_name).table =
@field(Table, "predefined_" ++ field_name);
},
.rle => {
@field(self, field_name).accuracy_log = 0;
remaining.* = remaining.subtract(1) orelse return error.EndOfStream;
@field(self, field_name).table = .{ .rle = try in.takeByte() };
},
.fse => {
const max_table_size = 2048;
const peek_len: usize = remaining.minInt(max_table_size);
if (in.buffer.len < peek_len) return error.InputBufferUndersize;
const limited_buffer = try in.peek(peek_len);
var bit_reader: BitReader = .{ .bytes = limited_buffer };
const table_size = try Table.decode(
&bit_reader,
@field(zstd.table_symbol_count_max, field_name),
@field(zstd.table_accuracy_log_max, field_name),
&@field(self, field_name ++ "_fse_buffer"),
);
@field(self, field_name).table = .{
.fse = (&@field(self, field_name ++ "_fse_buffer"))[0..table_size],
};
@field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size);
in.toss(bit_reader.index);
remaining.* = remaining.subtract(bit_reader.index).?;
},
.repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst,
}
}
const Sequence = struct {
literal_length: u32,
match_length: u32,
offset: u32,
};
fn nextSequence(
self: *Decode,
bit_reader: *ReverseBitReader,
) error{ InvalidBitStream, EndOfStream }!Sequence {
const raw_code = self.getCode(.offset);
const offset_code = std.math.cast(u5, raw_code) orelse {
return error.InvalidBitStream;
};
const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code);
const match_code = self.getCode(.match);
if (match_code >= zstd.match_length_code_table.len)
return error.InvalidBitStream;
const match = zstd.match_length_code_table[match_code];
const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]);
const literal_code = self.getCode(.literal);
if (literal_code >= zstd.literals_length_code_table.len)
return error.InvalidBitStream;
const literal = zstd.literals_length_code_table[literal_code];
const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]);
const offset = if (offset_value > 3) offset: {
const offset = offset_value - 3;
self.updateRepeatOffset(offset);
break :offset offset;
} else offset: {
if (literal_length == 0) {
if (offset_value == 3) {
const offset = self.repeat_offsets[0] - 1;
self.updateRepeatOffset(offset);
break :offset offset;
}
break :offset self.useRepeatOffset(offset_value);
}
break :offset self.useRepeatOffset(offset_value - 1);
};
if (offset == 0) return error.InvalidBitStream;
return .{
.literal_length = literal_length,
.match_length = match_length,
.offset = offset,
};
}
/// Decode one sequence from `bit_reader` into `dest`. Updates FSE states
/// if `last_sequence` is `false`. Assumes `prepare` called for the block
/// before attempting to decode sequences.
fn decodeSequence(
decode: *Decode,
dest: []u8,
write_pos: usize,
bit_reader: *ReverseBitReader,
) !usize {
const sequence = try decode.nextSequence(bit_reader);
const literal_length: usize = sequence.literal_length;
const match_length: usize = sequence.match_length;
const sequence_length = literal_length + match_length;
if (sequence_length > dest[write_pos..].len)
return error.MalformedSequence;
const copy_start = std.math.sub(usize, write_pos + sequence.literal_length, sequence.offset) catch
return error.MalformedSequence;
if (decode.literal_written_count + literal_length > decode.literal_header.regenerated_size)
return error.MalformedLiteralsLength;
var sub_bw: Writer = .fixed(dest[write_pos..]);
try decodeLiterals(decode, &sub_bw, literal_length);
decode.literal_written_count += literal_length;
// This is not a @memmove; it intentionally repeats patterns
// caused by iterating one byte at a time.
for (
dest[write_pos + literal_length ..][0..match_length],
dest[copy_start..][0..match_length],
) |*d, s| d.* = s;
return sequence_length;
}
fn nextLiteralMultiStream(self: *Decode) error{MissingStartBit}!void {
self.literal_stream_index += 1;
try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]);
}
fn initLiteralStream(self: *Decode, bytes: []const u8) error{MissingStartBit}!void {
self.literal_stream_reader = try ReverseBitReader.init(bytes);
}
fn isLiteralStreamEmpty(self: *Decode) bool {
switch (self.literal_streams) {
.one => return self.literal_stream_reader.isEmpty(),
.four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(),
}
}
const LiteralBitsError = error{
MissingStartBit,
UnexpectedEndOfLiteralStream,
};
fn readLiteralsBits(
self: *Decode,
bit_count_to_read: u16,
) LiteralBitsError!u16 {
return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: {
if (self.literal_streams == .four and self.literal_stream_index < 3) {
try self.nextLiteralMultiStream();
break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch
return error.UnexpectedEndOfLiteralStream;
} else {
return error.UnexpectedEndOfLiteralStream;
}
};
}
/// Decode `len` bytes of literals into `w`.
fn decodeLiterals(d: *Decode, w: *Writer, len: usize) !void {
switch (d.literal_header.block_type) {
.raw => {
try w.writeAll(d.literal_streams.one[d.literal_written_count..][0..len]);
},
.rle => {
try w.splatByteAll(d.literal_streams.one[0], len);
},
.compressed, .treeless => {
const buf = try w.writableSlice(len);
const huffman_tree = d.huffman_tree.?;
const max_bit_count = huffman_tree.max_bit_count;
const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight,
max_bit_count,
);
var bits_read: u4 = 0;
var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one;
var bit_count_to_read: u4 = starting_bit_count;
for (buf) |*out| {
var prefix: u16 = 0;
while (true) {
const new_bits = try d.readLiteralsBits(bit_count_to_read);
prefix <<= bit_count_to_read;
prefix |= new_bits;
bits_read += bit_count_to_read;
const result = try huffman_tree.query(huffman_tree_index, prefix);
switch (result) {
.symbol => |sym| {
out.* = sym;
bit_count_to_read = starting_bit_count;
bits_read = 0;
huffman_tree_index = huffman_tree.symbol_count_minus_one;
break;
},
.index => |index| {
huffman_tree_index = index;
const bit_count = LiteralsSection.HuffmanTree.weightToBitCount(
huffman_tree.nodes[index].weight,
max_bit_count,
);
bit_count_to_read = bit_count - bits_read;
},
}
}
}
},
}
}
/// TODO: don't use `@field`
fn getCode(self: *Decode, comptime choice: WhichFse) u32 {
return switch (@field(self, @tagName(choice)).table) {
.rle => |value| value,
.fse => |table| table[@field(self, @tagName(choice)).state].symbol,
};
}
}