Source
pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: ?usize) ![:0]u8 {
const source_code = input.readToEndAllocOptions(
gpa,
max_src_size,
size_hint,
@alignOf(u8),
0,
) catch |err| switch (err) {
error.ConnectionResetByPeer => unreachable,
error.ConnectionTimedOut => unreachable,
error.NotOpenForReading => unreachable,
else => |e| return e,
};
errdefer gpa.free(source_code);
// Detect unsupported file types with their Byte Order Mark
const unsupported_boms = [_][]const u8{
"\xff\xfe\x00\x00", // UTF-32 little endian
"\xfe\xff\x00\x00", // UTF-32 big endian
"\xfe\xff", // UTF-16 big endian
};
for (unsupported_boms) |bom| {
if (std.mem.startsWith(u8, source_code, bom)) {
return error.UnsupportedEncoding;
}
}
// If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
if (std.mem.startsWith(u8, source_code, "\xff\xfe")) {
if (source_code.len % 2 != 0) return error.InvalidEncoding;
// TODO: after wrangle-writer-buffering branch is merged,
// avoid this unnecessary allocation
const aligned_copy = try gpa.alloc(u16, source_code.len / 2);
defer gpa.free(aligned_copy);
@memcpy(std.mem.sliceAsBytes(aligned_copy), source_code);
const source_code_utf8 = std.unicode.utf16LeToUtf8AllocZ(gpa, aligned_copy) catch |err| switch (err) {
error.DanglingSurrogateHalf => error.UnsupportedEncoding,
error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
else => |e| return e,
};
gpa.free(source_code);
return source_code_utf8;
}
return source_code;
}