Function fmtUtf8 [src]
Return a Formatter for a (potentially ill-formed) UTF-8 string.
Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
Prototype
pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8)
Parameters
utf8: []const u8
Example
test fmtUtf8 {
const expectFmt = testing.expectFmt;
try expectFmt("", "{}", .{fmtUtf8("")});
try expectFmt("foo", "{}", .{fmtUtf8("foo")});
try expectFmt("𐐷", "{}", .{fmtUtf8("𐐷")});
// Table 3-8. U+FFFD for Non-Shortest Form Sequences
try expectFmt("��������A", "{}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")});
// Table 3-9. U+FFFD for Ill-Formed Sequences for Surrogates
try expectFmt("��������A", "{}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")});
// Table 3-10. U+FFFD for Other Ill-Formed Sequences
try expectFmt("�����A��B", "{}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")});
// Table 3-11. U+FFFD for Truncated Sequences
try expectFmt("����A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")});
}
Source
pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8) {
return .{ .data = utf8 };
}