Function utf8CountCodepoints [src]

Returns the length of a supplied UTF-8 string literal in terms of unicode codepoints.

Prototype

pub fn utf8CountCodepoints(s: []const u8) !usize

Parameters

s: []const u8

Source

pub fn utf8CountCodepoints(s: []const u8) !usize { var len: usize = 0; const N = @sizeOf(usize); const MASK = 0x80 * (std.math.maxInt(usize) / 0xff); var i: usize = 0; while (i < s.len) { // Fast path for ASCII sequences while (i + N <= s.len) : (i += N) { const v = mem.readInt(usize, s[i..][0..N], native_endian); if (v & MASK != 0) break; len += N; } if (i < s.len) { const n = try utf8ByteSequenceLength(s[i]); if (i + n > s.len) return error.TruncatedInput; switch (n) { 1 => {}, // ASCII, no validation needed else => _ = try utf8Decode(s[i..][0..n]), } i += n; len += 1; } } return len; }