Function utf8ToUtf16LeImpl [src]

Prototype

pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize

Parameters

utf16le: []u16utf8: []const u8surrogates: Surrogates

Source

pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize { var dest_index: usize = 0; var remaining = utf8; vectorized: { const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input. while (remaining.len >= chunk_len) { const chunk: Chunk = remaining[0..chunk_len].*; const mask: Chunk = @splat(0x80); if (@reduce(.Or, chunk & mask == mask)) { // found a non ASCII code unit break; } const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk); utf16le[dest_index..][0..chunk_len].* = utf16_chunk; dest_index += chunk_len; remaining = remaining[chunk_len..]; } } const view = switch (surrogates) { .cannot_encode_surrogate_half => try Utf8View.init(remaining), .can_encode_surrogate_half => try Wtf8View.init(remaining), }; var it = view.iterator(); while (it.nextCodepoint()) |codepoint| { if (codepoint < 0x10000) { utf16le[dest_index] = mem.nativeToLittle(u16, @intCast(codepoint)); dest_index += 1; } else { const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800; const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00; utf16le[dest_index..][0..2].* = .{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) }; dest_index += 2; } } return dest_index; }