struct LiteralsSection [src]

Fields

header: Header
huffman_tree: ?HuffmanTree
streams: Streams

Members

Source

pub const LiteralsSection = struct { header: Header, huffman_tree: ?HuffmanTree, streams: Streams, pub const Streams = union(enum) { one: []const u8, four: [4][]const u8, fn decode(size_format: u2, stream_data: []const u8) !Streams { if (size_format == 0) { return .{ .one = stream_data }; } if (stream_data.len < 6) return error.MalformedLiteralsSection; const stream_1_length: usize = std.mem.readInt(u16, stream_data[0..2], .little); const stream_2_length: usize = std.mem.readInt(u16, stream_data[2..4], .little); const stream_3_length: usize = std.mem.readInt(u16, stream_data[4..6], .little); const stream_1_start = 6; const stream_2_start = stream_1_start + stream_1_length; const stream_3_start = stream_2_start + stream_2_length; const stream_4_start = stream_3_start + stream_3_length; if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection; return .{ .four = .{ stream_data[stream_1_start .. stream_1_start + stream_1_length], stream_data[stream_2_start .. stream_2_start + stream_2_length], stream_data[stream_3_start .. stream_3_start + stream_3_length], stream_data[stream_4_start..], } }; } }; pub const Header = struct { block_type: BlockType, size_format: u2, regenerated_size: u20, compressed_size: ?u18, /// Decode a literals section header. pub fn decode(in: *Reader, remaining: *Limit) !Header { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; const byte0 = try in.takeByte(); const block_type: BlockType = @enumFromInt(byte0 & 0b11); const size_format: u2 = @intCast((byte0 & 0b1100) >> 2); var regenerated_size: u20 = undefined; var compressed_size: ?u18 = null; switch (block_type) { .raw, .rle => { switch (size_format) { 0, 2 => { regenerated_size = byte0 >> 3; }, 1 => { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; regenerated_size = (byte0 >> 4) + (@as(u20, try in.takeByte()) << 4); }, 3 => { remaining.* = remaining.subtract(2) orelse return error.EndOfStream; regenerated_size = (byte0 >> 4) + (@as(u20, try in.takeByte()) << 4) + (@as(u20, try in.takeByte()) << 12); }, } }, .compressed, .treeless => { remaining.* = remaining.subtract(2) orelse return error.EndOfStream; const byte1 = try in.takeByte(); const byte2 = try in.takeByte(); switch (size_format) { 0, 1 => { regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4); compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2); }, 2 => { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; const byte3 = try in.takeByte(); regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12); compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6); }, 3 => { remaining.* = remaining.subtract(2) orelse return error.EndOfStream; const byte3 = try in.takeByte(); const byte4 = try in.takeByte(); regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12); compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10); }, } }, } return .{ .block_type = block_type, .size_format = size_format, .regenerated_size = regenerated_size, .compressed_size = compressed_size, }; } }; pub const BlockType = enum(u2) { raw, rle, compressed, treeless, }; pub const HuffmanTree = struct { max_bit_count: u4, symbol_count_minus_one: u8, nodes: [256]PrefixedSymbol, pub const PrefixedSymbol = struct { symbol: u8, prefix: u16, weight: u4, }; pub const Result = union(enum) { symbol: u8, index: usize, }; pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{HuffmanTreeIncomplete}!Result { var node = self.nodes[index]; const weight = node.weight; var i: usize = index; while (node.weight == weight) { if (node.prefix == prefix) return .{ .symbol = node.symbol }; if (i == 0) return error.HuffmanTreeIncomplete; i -= 1; node = self.nodes[i]; } return .{ .index = i }; } pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 { return if (weight == 0) 0 else ((max_bit_count + 1) - weight); } pub const DecodeError = Reader.Error || error{ MalformedHuffmanTree, MalformedFseTable, MalformedAccuracyLog, EndOfStream, MissingStartBit, }; pub fn decode(in: *Reader, remaining: *Limit) HuffmanTree.DecodeError!HuffmanTree { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; const header = try in.takeByte(); if (header < 128) { return decodeFse(in, remaining, header); } else { return decodeDirect(in, remaining, header - 127); } } fn decodeDirect( in: *Reader, remaining: *Limit, encoded_symbol_count: usize, ) HuffmanTree.DecodeError!HuffmanTree { var weights: [256]u4 = undefined; const weights_byte_count = (encoded_symbol_count + 1) / 2; remaining.* = remaining.subtract(weights_byte_count) orelse return error.EndOfStream; for (0..weights_byte_count) |i| { const byte = try in.takeByte(); weights[2 * i] = @as(u4, @intCast(byte >> 4)); weights[2 * i + 1] = @as(u4, @intCast(byte & 0xF)); } const symbol_count = encoded_symbol_count + 1; return build(&weights, symbol_count); } fn decodeFse( in: *Reader, remaining: *Limit, compressed_size: usize, ) HuffmanTree.DecodeError!HuffmanTree { var weights: [256]u4 = undefined; remaining.* = remaining.subtract(compressed_size) orelse return error.EndOfStream; const compressed_buffer = try in.take(compressed_size); var bit_reader: BitReader = .{ .bytes = compressed_buffer }; var entries: [1 << 6]Table.Fse = undefined; const table_size = try Table.decode(&bit_reader, 256, 6, &entries); const accuracy_log = std.math.log2_int_ceil(usize, table_size); const remaining_buffer = bit_reader.bytes[bit_reader.index..]; const symbol_count = try assignWeights(remaining_buffer, accuracy_log, &entries, &weights); return build(&weights, symbol_count); } fn assignWeights( huff_bits_buffer: []const u8, accuracy_log: u16, entries: *[1 << 6]Table.Fse, weights: *[256]u4, ) !usize { var huff_bits = try ReverseBitReader.init(huff_bits_buffer); var i: usize = 0; var even_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log); var odd_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log); while (i < 254) { const even_data = entries[even_state]; var read_bits: u16 = 0; const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable; weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree; i += 1; if (read_bits < even_data.bits) { weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree; i += 1; break; } even_state = even_data.baseline + even_bits; read_bits = 0; const odd_data = entries[odd_state]; const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable; weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree; i += 1; if (read_bits < odd_data.bits) { if (i == 255) return error.MalformedHuffmanTree; weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree; i += 1; break; } odd_state = odd_data.baseline + odd_bits; } else return error.MalformedHuffmanTree; if (!huff_bits.isEmpty()) { return error.MalformedHuffmanTree; } return i + 1; // stream contains all but the last symbol } fn assignSymbols(weight_sorted_prefixed_symbols: []PrefixedSymbol, weights: [256]u4) usize { for (0..weight_sorted_prefixed_symbols.len) |i| { weight_sorted_prefixed_symbols[i] = .{ .symbol = @as(u8, @intCast(i)), .weight = undefined, .prefix = undefined, }; } std.mem.sort( PrefixedSymbol, weight_sorted_prefixed_symbols, weights, lessThanByWeight, ); var prefix: u16 = 0; var prefixed_symbol_count: usize = 0; var sorted_index: usize = 0; const symbol_count = weight_sorted_prefixed_symbols.len; while (sorted_index < symbol_count) { var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; const weight = weights[symbol]; if (weight == 0) { sorted_index += 1; continue; } while (sorted_index < symbol_count) : ({ sorted_index += 1; prefixed_symbol_count += 1; prefix += 1; }) { symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; if (weights[symbol] != weight) { prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1; break; } weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol; weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix; weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight; } } return prefixed_symbol_count; } fn build(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!HuffmanTree { var weight_power_sum_big: u32 = 0; for (weights[0 .. symbol_count - 1]) |value| { weight_power_sum_big += (@as(u16, 1) << value) >> 1; } if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree; const weight_power_sum = @as(u16, @intCast(weight_power_sum_big)); // advance to next power of two (even if weight_power_sum is a power of 2) // TODO: is it valid to have weight_power_sum == 0? const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1; const next_power_of_two = @as(u16, 1) << max_number_of_bits; weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1; var weight_sorted_prefixed_symbols: [256]PrefixedSymbol = undefined; const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*); const tree: HuffmanTree = .{ .max_bit_count = max_number_of_bits, .symbol_count_minus_one = @as(u8, @intCast(prefixed_symbol_count - 1)), .nodes = weight_sorted_prefixed_symbols, }; return tree; } fn lessThanByWeight( weights: [256]u4, lhs: PrefixedSymbol, rhs: PrefixedSymbol, ) bool { // NOTE: this function relies on the use of a stable sorting algorithm, // otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs; // should be added return weights[lhs.symbol] < weights[rhs.symbol]; } }; pub const StreamCount = enum { one, four }; pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount { return switch (block_type) { .raw, .rle => .one, .compressed, .treeless => if (size_format == 0) .one else .four, }; } pub const DecodeError = error{ /// Invalid header. MalformedLiteralsHeader, /// Decoding errors. MalformedLiteralsSection, /// Compressed literals have invalid accuracy. MalformedAccuracyLog, /// Compressed literals have invalid FSE table. MalformedFseTable, /// Failed decoding a Huffamn tree. MalformedHuffmanTree, /// Not enough bytes to complete the section. EndOfStream, ReadFailed, MissingStartBit, }; pub fn decode(in: *Reader, remaining: *Limit, buffer: []u8) DecodeError!LiteralsSection { const header = try Header.decode(in, remaining); switch (header.block_type) { .raw => { if (buffer.len < header.regenerated_size) return error.MalformedLiteralsSection; remaining.* = remaining.subtract(header.regenerated_size) orelse return error.EndOfStream; try in.readSliceAll(buffer[0..header.regenerated_size]); return .{ .header = header, .huffman_tree = null, .streams = .{ .one = buffer }, }; }, .rle => { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; buffer[0] = try in.takeByte(); return .{ .header = header, .huffman_tree = null, .streams = .{ .one = buffer[0..1] }, }; }, .compressed, .treeless => { const before_remaining = remaining.*; const huffman_tree = if (header.block_type == .compressed) try HuffmanTree.decode(in, remaining) else null; const huffman_tree_size = @intFromEnum(before_remaining) - @intFromEnum(remaining.*); const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch return error.MalformedLiteralsSection; if (total_streams_size > buffer.len) return error.MalformedLiteralsSection; remaining.* = remaining.subtract(total_streams_size) orelse return error.EndOfStream; try in.readSliceAll(buffer[0..total_streams_size]); const stream_data = buffer[0..total_streams_size]; const streams = try Streams.decode(header.size_format, stream_data); return .{ .header = header, .huffman_tree = huffman_tree, .streams = streams, }; }, } } }