struct Iterator [src]

Iterator over entries in the tar file represented by reader.

Fields

reader: *std.Io.Reader
diagnostics: ?*Diagnostics = null
header_buffer: [Header.SIZE]u8 = undefined
file_name_buffer: []u8
link_name_buffer: []u8
padding: usize = 0
unread_file_bytes: u64 = 0

Members

Source

pub const Iterator = struct { reader: *std.Io.Reader, diagnostics: ?*Diagnostics = null, // buffers for heeader and file attributes header_buffer: [Header.SIZE]u8 = undefined, file_name_buffer: []u8, link_name_buffer: []u8, // bytes of padding to the end of the block padding: usize = 0, // not consumed bytes of file from last next iteration unread_file_bytes: u64 = 0, /// Options for iterator. /// Buffers should be provided by the caller. pub const Options = struct { /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. file_name_buffer: []u8, /// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities. link_name_buffer: []u8, /// Collects error messages during unpacking diagnostics: ?*Diagnostics = null, }; /// Iterates over files in tar archive. /// `next` returns each file in tar archive. pub fn init(reader: *std.Io.Reader, options: Options) Iterator { return .{ .reader = reader, .diagnostics = options.diagnostics, .file_name_buffer = options.file_name_buffer, .link_name_buffer = options.link_name_buffer, }; } pub const File = struct { name: []const u8, // name of file, symlink or directory link_name: []const u8, // target name of symlink size: u64 = 0, // size of the file in bytes mode: u32 = 0, kind: FileKind = .file, }; fn readHeader(self: *Iterator) !?Header { if (self.padding > 0) { try self.reader.discardAll(self.padding); } const n = try self.reader.readSliceShort(&self.header_buffer); if (n == 0) return null; if (n < Header.SIZE) return error.UnexpectedEndOfStream; const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] }; if (try header.checkChksum() == 0) return null; return header; } fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 { if (size > buffer.len) return error.TarInsufficientBuffer; const buf = buffer[0..size]; try self.reader.readSliceAll(buf); return nullStr(buf); } fn newFile(self: *Iterator) File { return .{ .name = self.file_name_buffer[0..0], .link_name = self.link_name_buffer[0..0], }; } // Number of padding bytes in the last file block. fn blockPadding(size: u64) usize { const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary return @intCast(block_rounded - size); } /// Iterates through the tar archive as if it is a series of files. /// Internally, the tar format often uses entries (header with optional /// content) to add meta data that describes the next file. These /// entries should not normally be visible to the outside. As such, this /// loop iterates through one or more entries until it collects a all /// file attributes. pub fn next(self: *Iterator) !?File { if (self.unread_file_bytes > 0) { // If file content was not consumed by caller try self.reader.discardAll64(self.unread_file_bytes); self.unread_file_bytes = 0; } var file: File = self.newFile(); while (try self.readHeader()) |header| { const kind = header.kind(); const size: u64 = try header.size(); self.padding = blockPadding(size); switch (kind) { // File types to return upstream .directory, .normal, .symbolic_link => { file.kind = switch (kind) { .directory => .directory, .normal => .file, .symbolic_link => .sym_link, else => unreachable, }; file.mode = try header.mode(); // set file attributes if not already set by prefix/extended headers if (file.size == 0) { file.size = size; } if (file.link_name.len == 0) { file.link_name = try header.linkName(self.link_name_buffer); } if (file.name.len == 0) { file.name = try header.fullName(self.file_name_buffer); } self.padding = blockPadding(file.size); self.unread_file_bytes = file.size; return file; }, // Prefix header types .gnu_long_name => { file.name = try self.readString(@intCast(size), self.file_name_buffer); }, .gnu_long_link => { file.link_name = try self.readString(@intCast(size), self.link_name_buffer); }, .extended_header => { // Use just attributes from last extended header. file = self.newFile(); var rdr: PaxIterator = .{ .reader = self.reader, .size = @intCast(size), }; while (try rdr.next()) |attr| { switch (attr.kind) { .path => { file.name = try attr.value(self.file_name_buffer); }, .linkpath => { file.link_name = try attr.value(self.link_name_buffer); }, .size => { var buf: [pax_max_size_attr_len]u8 = undefined; file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10); }, } } }, // Ignored header type .global_extended_header => { self.reader.discardAll64(size) catch return error.TarHeadersTooBig; }, // All other are unsupported header types else => { const d = self.diagnostics orelse return error.TarUnsupportedHeader; try d.errors.append(d.allocator, .{ .unsupported_file_type = .{ .file_name = try d.allocator.dupe(u8, header.name()), .file_type = kind, } }); if (kind == .gnu_sparse) { try self.skipGnuSparseExtendedHeaders(header); } self.reader.discardAll64(size) catch return error.TarHeadersTooBig; }, } } return null; } pub fn streamRemaining(it: *Iterator, file: File, w: *std.Io.Writer) std.Io.Reader.StreamError!void { try it.reader.streamExact64(w, file.size); it.unread_file_bytes = 0; } fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void { var is_extended = header.bytes[482] > 0; while (is_extended) { var buf: [Header.SIZE]u8 = undefined; try self.reader.readSliceAll(&buf); is_extended = buf[504] > 0; } } }