Function next [src]

See std.json.Token for documentation of this function.

Prototype

pub fn next(self: *@This()) NextError!Token

Parameters

self: *@This()

Possible Errors

BufferUnderrun
OutOfMemory Error
SyntaxError Error
UnexpectedEndOfInput Error

Source

pub fn next(self: *@This()) NextError!Token { state_loop: while (true) { switch (self.state) { .value => { switch (try self.skipWhitespaceExpectByte()) { // Object, Array '{' => { try self.stack.push(OBJECT_MODE); self.cursor += 1; self.state = .object_start; return .object_begin; }, '[' => { try self.stack.push(ARRAY_MODE); self.cursor += 1; self.state = .array_start; return .array_begin; }, // String '"' => { self.cursor += 1; self.value_start = self.cursor; self.state = .string; continue :state_loop; }, // Number '1'...'9' => { self.value_start = self.cursor; self.cursor += 1; self.state = .number_int; continue :state_loop; }, '0' => { self.value_start = self.cursor; self.cursor += 1; self.state = .number_leading_zero; continue :state_loop; }, '-' => { self.value_start = self.cursor; self.cursor += 1; self.state = .number_minus; continue :state_loop; }, // literal values 't' => { self.cursor += 1; self.state = .literal_t; continue :state_loop; }, 'f' => { self.cursor += 1; self.state = .literal_f; continue :state_loop; }, 'n' => { self.cursor += 1; self.state = .literal_n; continue :state_loop; }, else => return error.SyntaxError, } }, .post_value => { if (try self.skipWhitespaceCheckEnd()) return .end_of_document; const c = self.input[self.cursor]; if (self.string_is_object_key) { self.string_is_object_key = false; switch (c) { ':' => { self.cursor += 1; self.state = .value; continue :state_loop; }, else => return error.SyntaxError, } } switch (c) { '}' => { if (self.stack.pop() != OBJECT_MODE) return error.SyntaxError; self.cursor += 1; // stay in .post_value state. return .object_end; }, ']' => { if (self.stack.pop() != ARRAY_MODE) return error.SyntaxError; self.cursor += 1; // stay in .post_value state. return .array_end; }, ',' => { switch (self.stack.peek()) { OBJECT_MODE => { self.state = .object_post_comma; }, ARRAY_MODE => { self.state = .value; }, } self.cursor += 1; continue :state_loop; }, else => return error.SyntaxError, } }, .object_start => { switch (try self.skipWhitespaceExpectByte()) { '"' => { self.cursor += 1; self.value_start = self.cursor; self.state = .string; self.string_is_object_key = true; continue :state_loop; }, '}' => { self.cursor += 1; _ = self.stack.pop(); self.state = .post_value; return .object_end; }, else => return error.SyntaxError, } }, .object_post_comma => { switch (try self.skipWhitespaceExpectByte()) { '"' => { self.cursor += 1; self.value_start = self.cursor; self.state = .string; self.string_is_object_key = true; continue :state_loop; }, else => return error.SyntaxError, } }, .array_start => { switch (try self.skipWhitespaceExpectByte()) { ']' => { self.cursor += 1; _ = self.stack.pop(); self.state = .post_value; return .array_end; }, else => { self.state = .value; continue :state_loop; }, } }, .number_minus => { if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); switch (self.input[self.cursor]) { '0' => { self.cursor += 1; self.state = .number_leading_zero; continue :state_loop; }, '1'...'9' => { self.cursor += 1; self.state = .number_int; continue :state_loop; }, else => return error.SyntaxError, } }, .number_leading_zero => { if (self.cursor >= self.input.len) return self.endOfBufferInNumber(true); switch (self.input[self.cursor]) { '.' => { self.cursor += 1; self.state = .number_post_dot; continue :state_loop; }, 'e', 'E' => { self.cursor += 1; self.state = .number_post_e; continue :state_loop; }, else => { self.state = .post_value; return Token{ .number = self.takeValueSlice() }; }, } }, .number_int => { while (self.cursor < self.input.len) : (self.cursor += 1) { switch (self.input[self.cursor]) { '0'...'9' => continue, '.' => { self.cursor += 1; self.state = .number_post_dot; continue :state_loop; }, 'e', 'E' => { self.cursor += 1; self.state = .number_post_e; continue :state_loop; }, else => { self.state = .post_value; return Token{ .number = self.takeValueSlice() }; }, } } return self.endOfBufferInNumber(true); }, .number_post_dot => { if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); switch (self.input[self.cursor]) { '0'...'9' => { self.cursor += 1; self.state = .number_frac; continue :state_loop; }, else => return error.SyntaxError, } }, .number_frac => { while (self.cursor < self.input.len) : (self.cursor += 1) { switch (self.input[self.cursor]) { '0'...'9' => continue, 'e', 'E' => { self.cursor += 1; self.state = .number_post_e; continue :state_loop; }, else => { self.state = .post_value; return Token{ .number = self.takeValueSlice() }; }, } } return self.endOfBufferInNumber(true); }, .number_post_e => { if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); switch (self.input[self.cursor]) { '0'...'9' => { self.cursor += 1; self.state = .number_exp; continue :state_loop; }, '+', '-' => { self.cursor += 1; self.state = .number_post_e_sign; continue :state_loop; }, else => return error.SyntaxError, } }, .number_post_e_sign => { if (self.cursor >= self.input.len) return self.endOfBufferInNumber(false); switch (self.input[self.cursor]) { '0'...'9' => { self.cursor += 1; self.state = .number_exp; continue :state_loop; }, else => return error.SyntaxError, } }, .number_exp => { while (self.cursor < self.input.len) : (self.cursor += 1) { switch (self.input[self.cursor]) { '0'...'9' => continue, else => { self.state = .post_value; return Token{ .number = self.takeValueSlice() }; }, } } return self.endOfBufferInNumber(true); }, .string => { while (self.cursor < self.input.len) : (self.cursor += 1) { switch (self.input[self.cursor]) { 0...0x1f => return error.SyntaxError, // Bare ASCII control code in string. // ASCII plain text. 0x20...('"' - 1), ('"' + 1)...('\\' - 1), ('\\' + 1)...0x7F => continue, // Special characters. '"' => { const result = Token{ .string = self.takeValueSlice() }; self.cursor += 1; self.state = .post_value; return result; }, '\\' => { const slice = self.takeValueSlice(); self.cursor += 1; self.state = .string_backslash; if (slice.len > 0) return Token{ .partial_string = slice }; continue :state_loop; }, // UTF-8 validation. // See http://unicode.org/mail-arch/unicode-ml/y2003-m02/att-0467/01-The_Algorithm_to_Valide_an_UTF-8_String 0xC2...0xDF => { self.cursor += 1; self.state = .string_utf8_last_byte; continue :state_loop; }, 0xE0 => { self.cursor += 1; self.state = .string_utf8_second_to_last_byte_guard_against_overlong; continue :state_loop; }, 0xE1...0xEC, 0xEE...0xEF => { self.cursor += 1; self.state = .string_utf8_second_to_last_byte; continue :state_loop; }, 0xED => { self.cursor += 1; self.state = .string_utf8_second_to_last_byte_guard_against_surrogate_half; continue :state_loop; }, 0xF0 => { self.cursor += 1; self.state = .string_utf8_third_to_last_byte_guard_against_overlong; continue :state_loop; }, 0xF1...0xF3 => { self.cursor += 1; self.state = .string_utf8_third_to_last_byte; continue :state_loop; }, 0xF4 => { self.cursor += 1; self.state = .string_utf8_third_to_last_byte_guard_against_too_large; continue :state_loop; }, 0x80...0xC1, 0xF5...0xFF => return error.SyntaxError, // Invalid UTF-8. } } if (self.is_end_of_input) return error.UnexpectedEndOfInput; const slice = self.takeValueSlice(); if (slice.len > 0) return Token{ .partial_string = slice }; return error.BufferUnderrun; }, .string_backslash => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { '"', '\\', '/' => { // Since these characters now represent themselves literally, // we can simply begin the next plaintext slice here. self.value_start = self.cursor; self.cursor += 1; self.state = .string; continue :state_loop; }, 'b' => { self.cursor += 1; self.value_start = self.cursor; self.state = .string; return Token{ .partial_string_escaped_1 = [_]u8{0x08} }; }, 'f' => { self.cursor += 1; self.value_start = self.cursor; self.state = .string; return Token{ .partial_string_escaped_1 = [_]u8{0x0c} }; }, 'n' => { self.cursor += 1; self.value_start = self.cursor; self.state = .string; return Token{ .partial_string_escaped_1 = [_]u8{'\n'} }; }, 'r' => { self.cursor += 1; self.value_start = self.cursor; self.state = .string; return Token{ .partial_string_escaped_1 = [_]u8{'\r'} }; }, 't' => { self.cursor += 1; self.value_start = self.cursor; self.state = .string; return Token{ .partial_string_escaped_1 = [_]u8{'\t'} }; }, 'u' => { self.cursor += 1; self.state = .string_backslash_u; continue :state_loop; }, else => return error.SyntaxError, } }, .string_backslash_u => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); const c = self.input[self.cursor]; switch (c) { '0'...'9' => { self.utf16_code_units[0] = @as(u16, c - '0') << 12; }, 'A'...'F' => { self.utf16_code_units[0] = @as(u16, c - 'A' + 10) << 12; }, 'a'...'f' => { self.utf16_code_units[0] = @as(u16, c - 'a' + 10) << 12; }, else => return error.SyntaxError, } self.cursor += 1; self.state = .string_backslash_u_1; continue :state_loop; }, .string_backslash_u_1 => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); const c = self.input[self.cursor]; switch (c) { '0'...'9' => { self.utf16_code_units[0] |= @as(u16, c - '0') << 8; }, 'A'...'F' => { self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 8; }, 'a'...'f' => { self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 8; }, else => return error.SyntaxError, } self.cursor += 1; self.state = .string_backslash_u_2; continue :state_loop; }, .string_backslash_u_2 => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); const c = self.input[self.cursor]; switch (c) { '0'...'9' => { self.utf16_code_units[0] |= @as(u16, c - '0') << 4; }, 'A'...'F' => { self.utf16_code_units[0] |= @as(u16, c - 'A' + 10) << 4; }, 'a'...'f' => { self.utf16_code_units[0] |= @as(u16, c - 'a' + 10) << 4; }, else => return error.SyntaxError, } self.cursor += 1; self.state = .string_backslash_u_3; continue :state_loop; }, .string_backslash_u_3 => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); const c = self.input[self.cursor]; switch (c) { '0'...'9' => { self.utf16_code_units[0] |= c - '0'; }, 'A'...'F' => { self.utf16_code_units[0] |= c - 'A' + 10; }, 'a'...'f' => { self.utf16_code_units[0] |= c - 'a' + 10; }, else => return error.SyntaxError, } self.cursor += 1; if (std.unicode.utf16IsHighSurrogate(self.utf16_code_units[0])) { self.state = .string_surrogate_half; continue :state_loop; } else if (std.unicode.utf16IsLowSurrogate(self.utf16_code_units[0])) { return error.SyntaxError; // Unexpected low surrogate half. } else { self.value_start = self.cursor; self.state = .string; return partialStringCodepoint(self.utf16_code_units[0]); } }, .string_surrogate_half => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { '\\' => { self.cursor += 1; self.state = .string_surrogate_half_backslash; continue :state_loop; }, else => return error.SyntaxError, // Expected low surrogate half. } }, .string_surrogate_half_backslash => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 'u' => { self.cursor += 1; self.state = .string_surrogate_half_backslash_u; continue :state_loop; }, else => return error.SyntaxError, // Expected low surrogate half. } }, .string_surrogate_half_backslash_u => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 'D', 'd' => { self.cursor += 1; self.utf16_code_units[1] = 0xD << 12; self.state = .string_surrogate_half_backslash_u_1; continue :state_loop; }, else => return error.SyntaxError, // Expected low surrogate half. } }, .string_surrogate_half_backslash_u_1 => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); const c = self.input[self.cursor]; switch (c) { 'C'...'F' => { self.cursor += 1; self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 8; self.state = .string_surrogate_half_backslash_u_2; continue :state_loop; }, 'c'...'f' => { self.cursor += 1; self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 8; self.state = .string_surrogate_half_backslash_u_2; continue :state_loop; }, else => return error.SyntaxError, // Expected low surrogate half. } }, .string_surrogate_half_backslash_u_2 => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); const c = self.input[self.cursor]; switch (c) { '0'...'9' => { self.cursor += 1; self.utf16_code_units[1] |= @as(u16, c - '0') << 4; self.state = .string_surrogate_half_backslash_u_3; continue :state_loop; }, 'A'...'F' => { self.cursor += 1; self.utf16_code_units[1] |= @as(u16, c - 'A' + 10) << 4; self.state = .string_surrogate_half_backslash_u_3; continue :state_loop; }, 'a'...'f' => { self.cursor += 1; self.utf16_code_units[1] |= @as(u16, c - 'a' + 10) << 4; self.state = .string_surrogate_half_backslash_u_3; continue :state_loop; }, else => return error.SyntaxError, } }, .string_surrogate_half_backslash_u_3 => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); const c = self.input[self.cursor]; switch (c) { '0'...'9' => { self.utf16_code_units[1] |= c - '0'; }, 'A'...'F' => { self.utf16_code_units[1] |= c - 'A' + 10; }, 'a'...'f' => { self.utf16_code_units[1] |= c - 'a' + 10; }, else => return error.SyntaxError, } self.cursor += 1; self.value_start = self.cursor; self.state = .string; const code_point = std.unicode.utf16DecodeSurrogatePair(&self.utf16_code_units) catch unreachable; return partialStringCodepoint(code_point); }, .string_utf8_last_byte => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 0x80...0xBF => { self.cursor += 1; self.state = .string; continue :state_loop; }, else => return error.SyntaxError, // Invalid UTF-8. } }, .string_utf8_second_to_last_byte => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 0x80...0xBF => { self.cursor += 1; self.state = .string_utf8_last_byte; continue :state_loop; }, else => return error.SyntaxError, // Invalid UTF-8. } }, .string_utf8_second_to_last_byte_guard_against_overlong => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 0xA0...0xBF => { self.cursor += 1; self.state = .string_utf8_last_byte; continue :state_loop; }, else => return error.SyntaxError, // Invalid UTF-8. } }, .string_utf8_second_to_last_byte_guard_against_surrogate_half => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 0x80...0x9F => { self.cursor += 1; self.state = .string_utf8_last_byte; continue :state_loop; }, else => return error.SyntaxError, // Invalid UTF-8. } }, .string_utf8_third_to_last_byte => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 0x80...0xBF => { self.cursor += 1; self.state = .string_utf8_second_to_last_byte; continue :state_loop; }, else => return error.SyntaxError, // Invalid UTF-8. } }, .string_utf8_third_to_last_byte_guard_against_overlong => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 0x90...0xBF => { self.cursor += 1; self.state = .string_utf8_second_to_last_byte; continue :state_loop; }, else => return error.SyntaxError, // Invalid UTF-8. } }, .string_utf8_third_to_last_byte_guard_against_too_large => { if (self.cursor >= self.input.len) return self.endOfBufferInString(); switch (self.input[self.cursor]) { 0x80...0x8F => { self.cursor += 1; self.state = .string_utf8_second_to_last_byte; continue :state_loop; }, else => return error.SyntaxError, // Invalid UTF-8. } }, .literal_t => { switch (try self.expectByte()) { 'r' => { self.cursor += 1; self.state = .literal_tr; continue :state_loop; }, else => return error.SyntaxError, } }, .literal_tr => { switch (try self.expectByte()) { 'u' => { self.cursor += 1; self.state = .literal_tru; continue :state_loop; }, else => return error.SyntaxError, } }, .literal_tru => { switch (try self.expectByte()) { 'e' => { self.cursor += 1; self.state = .post_value; return .true; }, else => return error.SyntaxError, } }, .literal_f => { switch (try self.expectByte()) { 'a' => { self.cursor += 1; self.state = .literal_fa; continue :state_loop; }, else => return error.SyntaxError, } }, .literal_fa => { switch (try self.expectByte()) { 'l' => { self.cursor += 1; self.state = .literal_fal; continue :state_loop; }, else => return error.SyntaxError, } }, .literal_fal => { switch (try self.expectByte()) { 's' => { self.cursor += 1; self.state = .literal_fals; continue :state_loop; }, else => return error.SyntaxError, } }, .literal_fals => { switch (try self.expectByte()) { 'e' => { self.cursor += 1; self.state = .post_value; return .false; }, else => return error.SyntaxError, } }, .literal_n => { switch (try self.expectByte()) { 'u' => { self.cursor += 1; self.state = .literal_nu; continue :state_loop; }, else => return error.SyntaxError, } }, .literal_nu => { switch (try self.expectByte()) { 'l' => { self.cursor += 1; self.state = .literal_nul; continue :state_loop; }, else => return error.SyntaxError, } }, .literal_nul => { switch (try self.expectByte()) { 'l' => { self.cursor += 1; self.state = .post_value; return .null; }, else => return error.SyntaxError, } }, } unreachable; } }