struct string_literal [src]
Alias for std.zig.string_literal
Members
- Error (union)
- parseAlloc (Function)
- parseCharLiteral (Function)
- ParsedCharLiteral (union)
- ParseError (Error Set)
- parseEscapeSequence (Function)
- parseWrite (Function)
- Result (union)
Source
const std = @import("../std.zig");
const assert = std.debug.assert;
const utf8Encode = std.unicode.utf8Encode;
pub const ParseError = error{
OutOfMemory,
InvalidLiteral,
};
pub const ParsedCharLiteral = union(enum) {
success: u21,
failure: Error,
};
pub const Result = union(enum) {
success,
failure: Error,
};
pub const Error = union(enum) {
/// The character after backslash is missing or not recognized.
invalid_escape_character: usize,
/// Expected hex digit at this index.
expected_hex_digit: usize,
/// Unicode escape sequence had no digits with rbrace at this index.
empty_unicode_escape_sequence: usize,
/// Expected hex digit or '}' at this index.
expected_hex_digit_or_rbrace: usize,
/// Invalid unicode codepoint at this index.
invalid_unicode_codepoint: usize,
/// Expected '{' at this index.
expected_lbrace: usize,
/// Expected '}' at this index.
expected_rbrace: usize,
/// Expected '\'' at this index.
expected_single_quote: usize,
/// The character at this index cannot be represented without an escape sequence.
invalid_character: usize,
/// `''`. Not returned for string literals.
empty_char_literal,
const FormatMessage = struct {
err: Error,
raw_string: []const u8,
};
fn formatMessage(
self: FormatMessage,
comptime f: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = f;
_ = options;
switch (self.err) {
.invalid_escape_character => |bad_index| try writer.print(
"invalid escape character: '{c}'",
.{self.raw_string[bad_index]},
),
.expected_hex_digit => |bad_index| try writer.print(
"expected hex digit, found '{c}'",
.{self.raw_string[bad_index]},
),
.empty_unicode_escape_sequence => try writer.writeAll(
"empty unicode escape sequence",
),
.expected_hex_digit_or_rbrace => |bad_index| try writer.print(
"expected hex digit or '}}', found '{c}'",
.{self.raw_string[bad_index]},
),
.invalid_unicode_codepoint => try writer.writeAll(
"unicode escape does not correspond to a valid unicode scalar value",
),
.expected_lbrace => |bad_index| try writer.print(
"expected '{{', found '{c}'",
.{self.raw_string[bad_index]},
),
.expected_rbrace => |bad_index| try writer.print(
"expected '}}', found '{c}'",
.{self.raw_string[bad_index]},
),
.expected_single_quote => |bad_index| try writer.print(
"expected single quote ('), found '{c}'",
.{self.raw_string[bad_index]},
),
.invalid_character => |bad_index| try writer.print(
"invalid byte in string or character literal: '{c}'",
.{self.raw_string[bad_index]},
),
.empty_char_literal => try writer.writeAll(
"empty character literal",
),
}
}
pub fn fmt(self: @This(), raw_string: []const u8) std.fmt.Formatter(formatMessage) {
return .{ .data = .{
.err = self,
.raw_string = raw_string,
} };
}
pub fn offset(err: Error) usize {
return switch (err) {
inline .invalid_escape_character,
.expected_hex_digit,
.empty_unicode_escape_sequence,
.expected_hex_digit_or_rbrace,
.invalid_unicode_codepoint,
.expected_lbrace,
.expected_rbrace,
.expected_single_quote,
.invalid_character,
=> |n| n,
.empty_char_literal => 0,
};
}
};
/// Asserts the slice starts and ends with single-quotes.
/// Returns an error if there is not exactly one UTF-8 codepoint in between.
pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral {
if (slice.len < 3) return .{ .failure = .empty_char_literal };
assert(slice[0] == '\'');
assert(slice[slice.len - 1] == '\'');
switch (slice[1]) {
'\\' => {
var offset: usize = 1;
const result = parseEscapeSequence(slice, &offset);
if (result == .success and (offset + 1 != slice.len or slice[offset] != '\''))
return .{ .failure = .{ .expected_single_quote = offset } };
return result;
},
0 => return .{ .failure = .{ .invalid_character = 1 } },
else => {
const inner = slice[1 .. slice.len - 1];
const n = std.unicode.utf8ByteSequenceLength(inner[0]) catch return .{
.failure = .{ .invalid_unicode_codepoint = 1 },
};
if (inner.len > n) return .{ .failure = .{ .expected_single_quote = 1 + n } };
const codepoint = switch (n) {
1 => inner[0],
2 => std.unicode.utf8Decode2(inner[0..2].*),
3 => std.unicode.utf8Decode3(inner[0..3].*),
4 => std.unicode.utf8Decode4(inner[0..4].*),
else => unreachable,
} catch return .{ .failure = .{ .invalid_unicode_codepoint = 1 } };
return .{ .success = codepoint };
},
}
}
/// Parse an escape sequence from `slice[offset..]`. If parsing is successful,
/// offset is updated to reflect the characters consumed.
pub fn parseEscapeSequence(slice: []const u8, offset: *usize) ParsedCharLiteral {
assert(slice.len > offset.*);
assert(slice[offset.*] == '\\');
if (slice.len == offset.* + 1)
return .{ .failure = .{ .invalid_escape_character = offset.* + 1 } };
offset.* += 2;
switch (slice[offset.* - 1]) {
'n' => return .{ .success = '\n' },
'r' => return .{ .success = '\r' },
'\\' => return .{ .success = '\\' },
't' => return .{ .success = '\t' },
'\'' => return .{ .success = '\'' },
'"' => return .{ .success = '"' },
'x' => {
var value: u8 = 0;
var i: usize = offset.*;
while (i < offset.* + 2) : (i += 1) {
if (i == slice.len) return .{ .failure = .{ .expected_hex_digit = i } };
const c = slice[i];
switch (c) {
'0'...'9' => {
value *= 16;
value += c - '0';
},
'a'...'f' => {
value *= 16;
value += c - 'a' + 10;
},
'A'...'F' => {
value *= 16;
value += c - 'A' + 10;
},
else => {
return .{ .failure = .{ .expected_hex_digit = i } };
},
}
}
offset.* = i;
return .{ .success = value };
},
'u' => {
var i: usize = offset.*;
if (i >= slice.len or slice[i] != '{') return .{ .failure = .{ .expected_lbrace = i } };
i += 1;
if (i >= slice.len) return .{ .failure = .{ .expected_hex_digit_or_rbrace = i } };
if (slice[i] == '}') return .{ .failure = .{ .empty_unicode_escape_sequence = i } };
var value: u32 = 0;
while (i < slice.len) : (i += 1) {
const c = slice[i];
switch (c) {
'0'...'9' => {
value *= 16;
value += c - '0';
},
'a'...'f' => {
value *= 16;
value += c - 'a' + 10;
},
'A'...'F' => {
value *= 16;
value += c - 'A' + 10;
},
'}' => {
i += 1;
break;
},
else => return .{ .failure = .{ .expected_hex_digit_or_rbrace = i } },
}
if (value > 0x10ffff) {
return .{ .failure = .{ .invalid_unicode_codepoint = i } };
}
} else {
return .{ .failure = .{ .expected_rbrace = i } };
}
offset.* = i;
return .{ .success = @as(u21, @intCast(value)) };
},
else => return .{ .failure = .{ .invalid_escape_character = offset.* - 1 } },
}
}
test parseCharLiteral {
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 'a' },
parseCharLiteral("'a'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 'ä' },
parseCharLiteral("'ä'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 0 },
parseCharLiteral("'\\x00'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 0x4f },
parseCharLiteral("'\\x4f'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 0x4f },
parseCharLiteral("'\\x4F'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 0x3041 },
parseCharLiteral("'ぁ'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 0 },
parseCharLiteral("'\\u{0}'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 0x3041 },
parseCharLiteral("'\\u{3041}'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 0x7f },
parseCharLiteral("'\\u{7f}'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .success = 0x7fff },
parseCharLiteral("'\\u{7FFF}'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .expected_hex_digit = 4 } },
parseCharLiteral("'\\x0'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .expected_single_quote = 5 } },
parseCharLiteral("'\\x000'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .invalid_escape_character = 2 } },
parseCharLiteral("'\\y'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .expected_lbrace = 3 } },
parseCharLiteral("'\\u'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .expected_lbrace = 3 } },
parseCharLiteral("'\\uFFFF'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .empty_unicode_escape_sequence = 4 } },
parseCharLiteral("'\\u{}'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .invalid_unicode_codepoint = 9 } },
parseCharLiteral("'\\u{FFFFFF}'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .expected_hex_digit_or_rbrace = 8 } },
parseCharLiteral("'\\u{FFFF'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .expected_single_quote = 9 } },
parseCharLiteral("'\\u{FFFF}x'"),
);
try std.testing.expectEqual(
ParsedCharLiteral{ .failure = .{ .invalid_character = 1 } },
parseCharLiteral("'\x00'"),
);
}
/// Parses `bytes` as a Zig string literal and writes the result to the std.io.Writer type.
/// Asserts `bytes` has '"' at beginning and end.
pub fn parseWrite(writer: anytype, bytes: []const u8) error{OutOfMemory}!Result {
assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"');
var index: usize = 1;
while (true) {
const b = bytes[index];
switch (b) {
'\\' => {
const escape_char_index = index + 1;
const result = parseEscapeSequence(bytes, &index);
switch (result) {
.success => |codepoint| {
if (bytes[escape_char_index] == 'u') {
var buf: [4]u8 = undefined;
const len = utf8Encode(codepoint, &buf) catch {
return Result{ .failure = .{ .invalid_unicode_codepoint = escape_char_index + 1 } };
};
try writer.writeAll(buf[0..len]);
} else {
try writer.writeByte(@as(u8, @intCast(codepoint)));
}
},
.failure => |err| return Result{ .failure = err },
}
},
'\n' => return Result{ .failure = .{ .invalid_character = index } },
'"' => return Result.success,
else => {
try writer.writeByte(b);
index += 1;
},
}
}
}
/// Higher level API. Does not return extra info about parse errors.
/// Caller owns returned memory.
pub fn parseAlloc(allocator: std.mem.Allocator, bytes: []const u8) ParseError![]u8 {
var buf = std.ArrayList(u8).init(allocator);
defer buf.deinit();
switch (try parseWrite(buf.writer(), bytes)) {
.success => return buf.toOwnedSlice(),
.failure => return error.InvalidLiteral,
}
}
test parseAlloc {
const expect = std.testing.expect;
const expectError = std.testing.expectError;
const eql = std.mem.eql;
var fixed_buf_mem: [512]u8 = undefined;
var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(&fixed_buf_mem);
const alloc = fixed_buf_alloc.allocator();
try expectError(error.InvalidLiteral, parseAlloc(alloc, "\"\\x6\""));
try expect(eql(u8, "foo\nbar", try parseAlloc(alloc, "\"foo\\nbar\"")));
try expect(eql(u8, "\x12foo", try parseAlloc(alloc, "\"\\x12foo\"")));
try expect(eql(u8, "bytes\u{1234}foo", try parseAlloc(alloc, "\"bytes\\u{1234}foo\"")));
try expect(eql(u8, "foo", try parseAlloc(alloc, "\"foo\"")));
try expect(eql(u8, "foo", try parseAlloc(alloc, "\"f\x6f\x6f\"")));
try expect(eql(u8, "f💯", try parseAlloc(alloc, "\"f\u{1f4af}\"")));
}