Function fmtUtf8 [src]

Return a Formatter for a (potentially ill-formed) UTF-8 string. Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD) according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder

Prototype

pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8)

Parameters

utf8: []const u8

Example

test fmtUtf8 { const expectFmt = testing.expectFmt; try expectFmt("", "{}", .{fmtUtf8("")}); try expectFmt("foo", "{}", .{fmtUtf8("foo")}); try expectFmt("𐐷", "{}", .{fmtUtf8("𐐷")}); // Table 3-8. U+FFFD for Non-Shortest Form Sequences try expectFmt("��������A", "{}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")}); // Table 3-9. U+FFFD for Ill-Formed Sequences for Surrogates try expectFmt("��������A", "{}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")}); // Table 3-10. U+FFFD for Other Ill-Formed Sequences try expectFmt("�����A��B", "{}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")}); // Table 3-11. U+FFFD for Truncated Sequences try expectFmt("����A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")}); }

Source

pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8) { return .{ .data = utf8 }; }