Function readSourceFileToEndAlloc [src]

Prototype

pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: ?usize) ![:0]u8

Parameters

gpa: Allocatorinput: std.fs.Filesize_hint: ?usize

Source

  pub fn readSourceFileToEndAlloc(gpa: Allocator, input: std.fs.File, size_hint: ?usize) ![:0]u8 {
    const source_code = input.readToEndAllocOptions(
        gpa,
        max_src_size,
        size_hint,
        @alignOf(u8),
        0,
    ) catch |err| switch (err) {
        error.ConnectionResetByPeer => unreachable,
        error.ConnectionTimedOut => unreachable,
        error.NotOpenForReading => unreachable,
        else => |e| return e,
    };
    errdefer gpa.free(source_code);

    // Detect unsupported file types with their Byte Order Mark
    const unsupported_boms = [_][]const u8{
        "\xff\xfe\x00\x00", // UTF-32 little endian
        "\xfe\xff\x00\x00", // UTF-32 big endian
        "\xfe\xff", // UTF-16 big endian
    };
    for (unsupported_boms) |bom| {
        if (std.mem.startsWith(u8, source_code, bom)) {
            return error.UnsupportedEncoding;
        }
    }

    // If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
    if (std.mem.startsWith(u8, source_code, "\xff\xfe")) {
        if (source_code.len % 2 != 0) return error.InvalidEncoding;
        // TODO: after wrangle-writer-buffering branch is merged,
        // avoid this unnecessary allocation
        const aligned_copy = try gpa.alloc(u16, source_code.len / 2);
        defer gpa.free(aligned_copy);
        @memcpy(std.mem.sliceAsBytes(aligned_copy), source_code);
        const source_code_utf8 = std.unicode.utf16LeToUtf8AllocZ(gpa, aligned_copy) catch |err| switch (err) {
            error.DanglingSurrogateHalf => error.UnsupportedEncoding,
            error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
            error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
            else => |e| return e,
        };
        gpa.free(source_code);
        return source_code_utf8;
    }

    return source_code;
}