struct DecodeState [src]

Fields

repeat_offsets: [3]u32
offset: StateData(8)
match: StateData(9)
literal: StateData(9)
offset_fse_buffer: []Table.Fse
match_fse_buffer: []Table.Fse
literal_fse_buffer: []Table.Fse
fse_tables_undefined: bool
literal_stream_reader: readers.ReverseBitReader
literal_stream_index: usize
literal_streams: LiteralsSection.Streams
literal_header: LiteralsSection.Header
huffman_tree: ?LiteralsSection.HuffmanTree
literal_written_count: usize
written_count: usize = 0

Members

Source

pub const DecodeState = struct { repeat_offsets: [3]u32, offset: StateData(8), match: StateData(9), literal: StateData(9), offset_fse_buffer: []Table.Fse, match_fse_buffer: []Table.Fse, literal_fse_buffer: []Table.Fse, fse_tables_undefined: bool, literal_stream_reader: readers.ReverseBitReader, literal_stream_index: usize, literal_streams: LiteralsSection.Streams, literal_header: LiteralsSection.Header, huffman_tree: ?LiteralsSection.HuffmanTree, literal_written_count: usize, written_count: usize = 0, fn StateData(comptime max_accuracy_log: comptime_int) type { return struct { state: State, table: Table, accuracy_log: u8, const State = std.meta.Int(.unsigned, max_accuracy_log); }; } pub fn init( literal_fse_buffer: []Table.Fse, match_fse_buffer: []Table.Fse, offset_fse_buffer: []Table.Fse, ) DecodeState { return DecodeState{ .repeat_offsets = .{ types.compressed_block.start_repeated_offset_1, types.compressed_block.start_repeated_offset_2, types.compressed_block.start_repeated_offset_3, }, .offset = undefined, .match = undefined, .literal = undefined, .literal_fse_buffer = literal_fse_buffer, .match_fse_buffer = match_fse_buffer, .offset_fse_buffer = offset_fse_buffer, .fse_tables_undefined = true, .literal_written_count = 0, .literal_header = undefined, .literal_streams = undefined, .literal_stream_reader = undefined, .literal_stream_index = undefined, .huffman_tree = null, .written_count = 0, }; } /// Prepare the decoder to decode a compressed block. Loads the literals /// stream and Huffman tree from `literals` and reads the FSE tables from /// `source`. /// /// Errors returned: /// - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's /// first byte does not have any bits set /// - `error.TreelessLiteralsFirst` `literals` is a treeless literals /// section and the decode state does not have a Huffman tree from a /// previous block /// - `error.RepeatModeFirst` on the first call if one of the sequence FSE /// tables is set to repeat mode /// - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy /// - `error.MalformedFseTable` if there are errors decoding an FSE table /// - `error.EndOfStream` if `source` ends before all FSE tables are read pub fn prepare( self: *DecodeState, source: anytype, literals: LiteralsSection, sequences_header: SequencesSection.Header, ) !void { self.literal_written_count = 0; self.literal_header = literals.header; self.literal_streams = literals.streams; if (literals.huffman_tree) |tree| { self.huffman_tree = tree; } else if (literals.header.block_type == .treeless and self.huffman_tree == null) { return error.TreelessLiteralsFirst; } switch (literals.header.block_type) { .raw, .rle => {}, .compressed, .treeless => { self.literal_stream_index = 0; switch (literals.streams) { .one => |slice| try self.initLiteralStream(slice), .four => |streams| try self.initLiteralStream(streams[0]), } }, } if (sequences_header.sequence_count > 0) { try self.updateFseTable(source, .literal, sequences_header.literal_lengths); try self.updateFseTable(source, .offset, sequences_header.offsets); try self.updateFseTable(source, .match, sequences_header.match_lengths); self.fse_tables_undefined = false; } } /// Read initial FSE states for sequence decoding. /// /// Errors returned: /// - `error.EndOfStream` if `bit_reader` does not contain enough bits. pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void { self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log); self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log); self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log); } fn updateRepeatOffset(self: *DecodeState, offset: u32) void { self.repeat_offsets[2] = self.repeat_offsets[1]; self.repeat_offsets[1] = self.repeat_offsets[0]; self.repeat_offsets[0] = offset; } fn useRepeatOffset(self: *DecodeState, index: usize) u32 { if (index == 1) std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1]) else if (index == 2) { std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]); std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]); } return self.repeat_offsets[0]; } const DataType = enum { offset, match, literal }; fn updateState( self: *DecodeState, comptime choice: DataType, bit_reader: *readers.ReverseBitReader, ) error{ MalformedFseBits, EndOfStream }!void { switch (@field(self, @tagName(choice)).table) { .rle => {}, .fse => |table| { const data = table[@field(self, @tagName(choice)).state]; const T = @TypeOf(@field(self, @tagName(choice))).State; const bits_summand = try bit_reader.readBitsNoEof(T, data.bits); const next_state = std.math.cast( @TypeOf(@field(self, @tagName(choice))).State, data.baseline + bits_summand, ) orelse return error.MalformedFseBits; @field(self, @tagName(choice)).state = next_state; }, } } const FseTableError = error{ MalformedFseTable, MalformedAccuracyLog, RepeatModeFirst, EndOfStream, }; fn updateFseTable( self: *DecodeState, source: anytype, comptime choice: DataType, mode: SequencesSection.Header.Mode, ) !void { const field_name = @tagName(choice); switch (mode) { .predefined => { @field(self, field_name).accuracy_log = @field(types.compressed_block.default_accuracy_log, field_name); @field(self, field_name).table = @field(types.compressed_block, "predefined_" ++ field_name ++ "_fse_table"); }, .rle => { @field(self, field_name).accuracy_log = 0; @field(self, field_name).table = .{ .rle = try source.readByte() }; }, .fse => { var bit_reader = readers.bitReader(source); const table_size = try decodeFseTable( &bit_reader, @field(types.compressed_block.table_symbol_count_max, field_name), @field(types.compressed_block.table_accuracy_log_max, field_name), @field(self, field_name ++ "_fse_buffer"), ); @field(self, field_name).table = .{ .fse = @field(self, field_name ++ "_fse_buffer")[0..table_size], }; @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size); }, .repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst, } } const Sequence = struct { literal_length: u32, match_length: u32, offset: u32, }; fn nextSequence( self: *DecodeState, bit_reader: *readers.ReverseBitReader, ) error{ InvalidBitStream, EndOfStream }!Sequence { const raw_code = self.getCode(.offset); const offset_code = std.math.cast(u5, raw_code) orelse { return error.InvalidBitStream; }; const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code); const match_code = self.getCode(.match); if (match_code >= types.compressed_block.match_length_code_table.len) return error.InvalidBitStream; const match = types.compressed_block.match_length_code_table[match_code]; const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]); const literal_code = self.getCode(.literal); if (literal_code >= types.compressed_block.literals_length_code_table.len) return error.InvalidBitStream; const literal = types.compressed_block.literals_length_code_table[literal_code]; const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]); const offset = if (offset_value > 3) offset: { const offset = offset_value - 3; self.updateRepeatOffset(offset); break :offset offset; } else offset: { if (literal_length == 0) { if (offset_value == 3) { const offset = self.repeat_offsets[0] - 1; self.updateRepeatOffset(offset); break :offset offset; } break :offset self.useRepeatOffset(offset_value); } break :offset self.useRepeatOffset(offset_value - 1); }; if (offset == 0) return error.InvalidBitStream; return .{ .literal_length = literal_length, .match_length = match_length, .offset = offset, }; } fn executeSequenceSlice( self: *DecodeState, dest: []u8, write_pos: usize, sequence: Sequence, ) (error{MalformedSequence} || DecodeLiteralsError)!void { if (sequence.offset > write_pos + sequence.literal_length) return error.MalformedSequence; try self.decodeLiteralsSlice(dest[write_pos..], sequence.literal_length); const copy_start = write_pos + sequence.literal_length - sequence.offset; for ( dest[write_pos + sequence.literal_length ..][0..sequence.match_length], dest[copy_start..][0..sequence.match_length], ) |*d, s| d.* = s; self.written_count += sequence.match_length; } fn executeSequenceRingBuffer( self: *DecodeState, dest: *RingBuffer, sequence: Sequence, ) (error{MalformedSequence} || DecodeLiteralsError)!void { if (sequence.offset > @min(dest.data.len, self.written_count + sequence.literal_length)) return error.MalformedSequence; try self.decodeLiteralsRingBuffer(dest, sequence.literal_length); const copy_start = dest.write_index + dest.data.len - sequence.offset; const copy_slice = dest.sliceAt(copy_start, sequence.match_length); dest.writeSliceForwardsAssumeCapacity(copy_slice.first); dest.writeSliceForwardsAssumeCapacity(copy_slice.second); self.written_count += sequence.match_length; } const DecodeSequenceError = error{ InvalidBitStream, EndOfStream, MalformedSequence, MalformedFseBits, } || DecodeLiteralsError; /// Decode one sequence from `bit_reader` into `dest`, written starting at /// `write_pos` and update FSE states if `last_sequence` is `false`. /// `prepare()` must be called for the block before attempting to decode /// sequences. /// /// Errors returned: /// - `error.MalformedSequence` if the decompressed sequence would be /// longer than `sequence_size_limit` or the sequence's offset is too /// large /// - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal /// streams do not contain enough literals for the sequence (this may /// mean the literal stream or the sequence is malformed). /// - `error.InvalidBitStream` if the FSE sequence bitstream is malformed /// - `error.EndOfStream` if `bit_reader` does not contain enough bits /// - `error.DestTooSmall` if `dest` is not large enough to holde the /// decompressed sequence pub fn decodeSequenceSlice( self: *DecodeState, dest: []u8, write_pos: usize, bit_reader: *readers.ReverseBitReader, sequence_size_limit: usize, last_sequence: bool, ) (error{DestTooSmall} || DecodeSequenceError)!usize { const sequence = try self.nextSequence(bit_reader); const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length; if (sequence_length > sequence_size_limit) return error.MalformedSequence; if (sequence_length > dest[write_pos..].len) return error.DestTooSmall; try self.executeSequenceSlice(dest, write_pos, sequence); if (!last_sequence) { try self.updateState(.literal, bit_reader); try self.updateState(.match, bit_reader); try self.updateState(.offset, bit_reader); } return sequence_length; } /// Decode one sequence from `bit_reader` into `dest`; see /// `decodeSequenceSlice`. pub fn decodeSequenceRingBuffer( self: *DecodeState, dest: *RingBuffer, bit_reader: anytype, sequence_size_limit: usize, last_sequence: bool, ) DecodeSequenceError!usize { const sequence = try self.nextSequence(bit_reader); const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length; if (sequence_length > sequence_size_limit) return error.MalformedSequence; try self.executeSequenceRingBuffer(dest, sequence); if (!last_sequence) { try self.updateState(.literal, bit_reader); try self.updateState(.match, bit_reader); try self.updateState(.offset, bit_reader); } return sequence_length; } fn nextLiteralMultiStream( self: *DecodeState, ) error{BitStreamHasNoStartBit}!void { self.literal_stream_index += 1; try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]); } fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void { try self.literal_stream_reader.init(bytes); } fn isLiteralStreamEmpty(self: *DecodeState) bool { switch (self.literal_streams) { .one => return self.literal_stream_reader.isEmpty(), .four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(), } } const LiteralBitsError = error{ BitStreamHasNoStartBit, UnexpectedEndOfLiteralStream, }; fn readLiteralsBits( self: *DecodeState, bit_count_to_read: u16, ) LiteralBitsError!u16 { return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: { if (self.literal_streams == .four and self.literal_stream_index < 3) { try self.nextLiteralMultiStream(); break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch return error.UnexpectedEndOfLiteralStream; } else { return error.UnexpectedEndOfLiteralStream; } }; } const DecodeLiteralsError = error{ MalformedLiteralsLength, NotFound, } || LiteralBitsError; /// Decode `len` bytes of literals into `dest`. /// /// Errors returned: /// - `error.MalformedLiteralsLength` if the number of literal bytes /// decoded by `self` plus `len` is greater than the regenerated size of /// `literals` /// - `error.UnexpectedEndOfLiteralStream` and `error.NotFound` if there /// are problems decoding Huffman compressed literals pub fn decodeLiteralsSlice( self: *DecodeState, dest: []u8, len: usize, ) DecodeLiteralsError!void { if (self.literal_written_count + len > self.literal_header.regenerated_size) return error.MalformedLiteralsLength; switch (self.literal_header.block_type) { .raw => { const literal_data = self.literal_streams.one[self.literal_written_count..][0..len]; @memcpy(dest[0..len], literal_data); self.literal_written_count += len; self.written_count += len; }, .rle => { for (0..len) |i| { dest[i] = self.literal_streams.one[0]; } self.literal_written_count += len; self.written_count += len; }, .compressed, .treeless => { // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4; const huffman_tree = self.huffman_tree orelse unreachable; const max_bit_count = huffman_tree.max_bit_count; const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount( huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, max_bit_count, ); var bits_read: u4 = 0; var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; var bit_count_to_read: u4 = starting_bit_count; for (0..len) |i| { var prefix: u16 = 0; while (true) { const new_bits = self.readLiteralsBits(bit_count_to_read) catch |err| { return err; }; prefix <<= bit_count_to_read; prefix |= new_bits; bits_read += bit_count_to_read; const result = huffman_tree.query(huffman_tree_index, prefix) catch |err| { return err; }; switch (result) { .symbol => |sym| { dest[i] = sym; bit_count_to_read = starting_bit_count; bits_read = 0; huffman_tree_index = huffman_tree.symbol_count_minus_one; break; }, .index => |index| { huffman_tree_index = index; const bit_count = LiteralsSection.HuffmanTree.weightToBitCount( huffman_tree.nodes[index].weight, max_bit_count, ); bit_count_to_read = bit_count - bits_read; }, } } } self.literal_written_count += len; self.written_count += len; }, } } /// Decode literals into `dest`; see `decodeLiteralsSlice()`. pub fn decodeLiteralsRingBuffer( self: *DecodeState, dest: *RingBuffer, len: usize, ) DecodeLiteralsError!void { if (self.literal_written_count + len > self.literal_header.regenerated_size) return error.MalformedLiteralsLength; switch (self.literal_header.block_type) { .raw => { const literals_end = self.literal_written_count + len; const literal_data = self.literal_streams.one[self.literal_written_count..literals_end]; dest.writeSliceAssumeCapacity(literal_data); self.literal_written_count += len; self.written_count += len; }, .rle => { for (0..len) |_| { dest.writeAssumeCapacity(self.literal_streams.one[0]); } self.literal_written_count += len; self.written_count += len; }, .compressed, .treeless => { // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4; const huffman_tree = self.huffman_tree orelse unreachable; const max_bit_count = huffman_tree.max_bit_count; const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount( huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, max_bit_count, ); var bits_read: u4 = 0; var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; var bit_count_to_read: u4 = starting_bit_count; for (0..len) |_| { var prefix: u16 = 0; while (true) { const new_bits = try self.readLiteralsBits(bit_count_to_read); prefix <<= bit_count_to_read; prefix |= new_bits; bits_read += bit_count_to_read; const result = try huffman_tree.query(huffman_tree_index, prefix); switch (result) { .symbol => |sym| { dest.writeAssumeCapacity(sym); bit_count_to_read = starting_bit_count; bits_read = 0; huffman_tree_index = huffman_tree.symbol_count_minus_one; break; }, .index => |index| { huffman_tree_index = index; const bit_count = LiteralsSection.HuffmanTree.weightToBitCount( huffman_tree.nodes[index].weight, max_bit_count, ); bit_count_to_read = bit_count - bits_read; }, } } } self.literal_written_count += len; self.written_count += len; }, } } fn getCode(self: *DecodeState, comptime choice: DataType) u32 { return switch (@field(self, @tagName(choice)).table) { .rle => |value| value, .fse => |table| table[@field(self, @tagName(choice)).state].symbol, }; } }