Function utf8CountCodepoints [src]
Returns the length of a supplied UTF-8 string literal in terms of unicode
codepoints.
Prototype
pub fn utf8CountCodepoints(s: []const u8) !usize
Parameters
s: []const u8
Source
pub fn utf8CountCodepoints(s: []const u8) !usize {
var len: usize = 0;
const N = @sizeOf(usize);
const MASK = 0x80 * (std.math.maxInt(usize) / 0xff);
var i: usize = 0;
while (i < s.len) {
// Fast path for ASCII sequences
while (i + N <= s.len) : (i += N) {
const v = mem.readInt(usize, s[i..][0..N], native_endian);
if (v & MASK != 0) break;
len += N;
}
if (i < s.len) {
const n = try utf8ByteSequenceLength(s[i]);
if (i + n > s.len) return error.TruncatedInput;
switch (n) {
1 => {}, // ASCII, no validation needed
else => _ = try utf8Decode(s[i..][0..n]),
}
i += n;
len += 1;
}
}
return len;
}