struct Iterator [src]
Iterator over entries in the tar file represented by reader.
Fields
reader: *std.Io.Reader
diagnostics: ?*Diagnostics = null
header_buffer: [Header.SIZE]u8 = undefined
file_name_buffer: []u8
link_name_buffer: []u8
padding: usize = 0
unread_file_bytes: u64 = 0
Members
- File (struct)
- init (Function)
- next (Function)
- Options (struct)
- streamRemaining (Function)
Source
pub const Iterator = struct {
reader: *std.Io.Reader,
diagnostics: ?*Diagnostics = null,
// buffers for heeader and file attributes
header_buffer: [Header.SIZE]u8 = undefined,
file_name_buffer: []u8,
link_name_buffer: []u8,
// bytes of padding to the end of the block
padding: usize = 0,
// not consumed bytes of file from last next iteration
unread_file_bytes: u64 = 0,
/// Options for iterator.
/// Buffers should be provided by the caller.
pub const Options = struct {
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
file_name_buffer: []u8,
/// Use a buffer with length `std.fs.max_path_bytes` to match file system capabilities.
link_name_buffer: []u8,
/// Collects error messages during unpacking
diagnostics: ?*Diagnostics = null,
};
/// Iterates over files in tar archive.
/// `next` returns each file in tar archive.
pub fn init(reader: *std.Io.Reader, options: Options) Iterator {
return .{
.reader = reader,
.diagnostics = options.diagnostics,
.file_name_buffer = options.file_name_buffer,
.link_name_buffer = options.link_name_buffer,
};
}
pub const File = struct {
name: []const u8, // name of file, symlink or directory
link_name: []const u8, // target name of symlink
size: u64 = 0, // size of the file in bytes
mode: u32 = 0,
kind: FileKind = .file,
};
fn readHeader(self: *Iterator) !?Header {
if (self.padding > 0) {
try self.reader.discardAll(self.padding);
}
const n = try self.reader.readSliceShort(&self.header_buffer);
if (n == 0) return null;
if (n < Header.SIZE) return error.UnexpectedEndOfStream;
const header = Header{ .bytes = self.header_buffer[0..Header.SIZE] };
if (try header.checkChksum() == 0) return null;
return header;
}
fn readString(self: *Iterator, size: usize, buffer: []u8) ![]const u8 {
if (size > buffer.len) return error.TarInsufficientBuffer;
const buf = buffer[0..size];
try self.reader.readSliceAll(buf);
return nullStr(buf);
}
fn newFile(self: *Iterator) File {
return .{
.name = self.file_name_buffer[0..0],
.link_name = self.link_name_buffer[0..0],
};
}
// Number of padding bytes in the last file block.
fn blockPadding(size: u64) usize {
const block_rounded = std.mem.alignForward(u64, size, Header.SIZE); // size rounded to te block boundary
return @intCast(block_rounded - size);
}
/// Iterates through the tar archive as if it is a series of files.
/// Internally, the tar format often uses entries (header with optional
/// content) to add meta data that describes the next file. These
/// entries should not normally be visible to the outside. As such, this
/// loop iterates through one or more entries until it collects a all
/// file attributes.
pub fn next(self: *Iterator) !?File {
if (self.unread_file_bytes > 0) {
// If file content was not consumed by caller
try self.reader.discardAll64(self.unread_file_bytes);
self.unread_file_bytes = 0;
}
var file: File = self.newFile();
while (try self.readHeader()) |header| {
const kind = header.kind();
const size: u64 = try header.size();
self.padding = blockPadding(size);
switch (kind) {
// File types to return upstream
.directory, .normal, .symbolic_link => {
file.kind = switch (kind) {
.directory => .directory,
.normal => .file,
.symbolic_link => .sym_link,
else => unreachable,
};
file.mode = try header.mode();
// set file attributes if not already set by prefix/extended headers
if (file.size == 0) {
file.size = size;
}
if (file.link_name.len == 0) {
file.link_name = try header.linkName(self.link_name_buffer);
}
if (file.name.len == 0) {
file.name = try header.fullName(self.file_name_buffer);
}
self.padding = blockPadding(file.size);
self.unread_file_bytes = file.size;
return file;
},
// Prefix header types
.gnu_long_name => {
file.name = try self.readString(@intCast(size), self.file_name_buffer);
},
.gnu_long_link => {
file.link_name = try self.readString(@intCast(size), self.link_name_buffer);
},
.extended_header => {
// Use just attributes from last extended header.
file = self.newFile();
var rdr: PaxIterator = .{
.reader = self.reader,
.size = @intCast(size),
};
while (try rdr.next()) |attr| {
switch (attr.kind) {
.path => {
file.name = try attr.value(self.file_name_buffer);
},
.linkpath => {
file.link_name = try attr.value(self.link_name_buffer);
},
.size => {
var buf: [pax_max_size_attr_len]u8 = undefined;
file.size = try std.fmt.parseInt(u64, try attr.value(&buf), 10);
},
}
}
},
// Ignored header type
.global_extended_header => {
self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
},
// All other are unsupported header types
else => {
const d = self.diagnostics orelse return error.TarUnsupportedHeader;
try d.errors.append(d.allocator, .{ .unsupported_file_type = .{
.file_name = try d.allocator.dupe(u8, header.name()),
.file_type = kind,
} });
if (kind == .gnu_sparse) {
try self.skipGnuSparseExtendedHeaders(header);
}
self.reader.discardAll64(size) catch return error.TarHeadersTooBig;
},
}
}
return null;
}
pub fn streamRemaining(it: *Iterator, file: File, w: *std.Io.Writer) std.Io.Reader.StreamError!void {
try it.reader.streamExact64(w, file.size);
it.unread_file_bytes = 0;
}
fn skipGnuSparseExtendedHeaders(self: *Iterator, header: Header) !void {
var is_extended = header.bytes[482] > 0;
while (is_extended) {
var buf: [Header.SIZE]u8 = undefined;
try self.reader.readSliceAll(&buf);
is_extended = buf[504] > 0;
}
}
}