struct DepTokenizer [src]
Alias for std.Build.Cache.DepTokenizer
Fields
index: usize = 0
bytes: []const u8
state: State = .lhs
Members
Source
const Tokenizer = @This();
index: usize = 0,
bytes: []const u8,
state: State = .lhs,
const std = @import("std");
const testing = std.testing;
const assert = std.debug.assert;
pub fn next(self: *Tokenizer) ?Token {
var start = self.index;
var must_resolve = false;
while (self.index < self.bytes.len) {
const char = self.bytes[self.index];
switch (self.state) {
.lhs => switch (char) {
'\t', '\n', '\r', ' ' => {
// silently ignore whitespace
self.index += 1;
},
else => {
start = self.index;
self.state = .target;
},
},
.target => switch (char) {
'\n', '\r' => {
return errorIllegalChar(.invalid_target, self.index, char);
},
'$' => {
self.state = .target_dollar_sign;
self.index += 1;
},
'\\' => {
self.state = .target_reverse_solidus;
self.index += 1;
},
':' => {
self.state = .target_colon;
self.index += 1;
},
'\t', ' ' => {
self.state = .target_space;
const bytes = self.bytes[start..self.index];
std.debug.assert(bytes.len != 0);
self.index += 1;
return finishTarget(must_resolve, bytes);
},
else => {
self.index += 1;
},
},
.target_reverse_solidus => switch (char) {
'\t', '\n', '\r' => {
return errorIllegalChar(.bad_target_escape, self.index, char);
},
' ', '#', '\\' => {
must_resolve = true;
self.state = .target;
self.index += 1;
},
'$' => {
self.state = .target_dollar_sign;
self.index += 1;
},
else => {
self.state = .target;
self.index += 1;
},
},
.target_dollar_sign => switch (char) {
'$' => {
must_resolve = true;
self.state = .target;
self.index += 1;
},
else => {
return errorIllegalChar(.expected_dollar_sign, self.index, char);
},
},
.target_colon => switch (char) {
'\n', '\r' => {
const bytes = self.bytes[start .. self.index - 1];
if (bytes.len != 0) {
self.state = .lhs;
return finishTarget(must_resolve, bytes);
}
// silently ignore null target
self.state = .lhs;
},
'/', '\\' => {
self.state = .target_colon_reverse_solidus;
self.index += 1;
},
else => {
const bytes = self.bytes[start .. self.index - 1];
if (bytes.len != 0) {
self.state = .rhs;
return finishTarget(must_resolve, bytes);
}
// silently ignore null target
self.state = .lhs;
},
},
.target_colon_reverse_solidus => switch (char) {
'\n', '\r' => {
const bytes = self.bytes[start .. self.index - 2];
if (bytes.len != 0) {
self.state = .lhs;
return finishTarget(must_resolve, bytes);
}
// silently ignore null target
self.state = .lhs;
},
else => {
self.state = .target;
},
},
.target_space => switch (char) {
'\t', ' ' => {
// silently ignore additional horizontal whitespace
self.index += 1;
},
':' => {
self.state = .rhs;
self.index += 1;
},
else => {
return errorIllegalChar(.expected_colon, self.index, char);
},
},
.rhs => switch (char) {
'\t', ' ' => {
// silently ignore horizontal whitespace
self.index += 1;
},
'\n', '\r' => {
self.state = .lhs;
},
'\\' => {
self.state = .rhs_continuation;
self.index += 1;
},
'"' => {
self.state = .prereq_quote;
self.index += 1;
start = self.index;
},
else => {
start = self.index;
self.state = .prereq;
},
},
.rhs_continuation => switch (char) {
'\n' => {
self.state = .rhs;
self.index += 1;
},
'\r' => {
self.state = .rhs_continuation_linefeed;
self.index += 1;
},
else => {
return errorIllegalChar(.continuation_eol, self.index, char);
},
},
.rhs_continuation_linefeed => switch (char) {
'\n' => {
self.state = .rhs;
self.index += 1;
},
else => {
return errorIllegalChar(.continuation_eol, self.index, char);
},
},
.prereq_quote => switch (char) {
'"' => {
self.index += 1;
self.state = .rhs;
return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
},
else => {
self.index += 1;
},
},
.prereq => switch (char) {
'\t', ' ' => {
self.state = .rhs;
return finishPrereq(must_resolve, self.bytes[start..self.index]);
},
'\n', '\r' => {
self.state = .lhs;
return finishPrereq(must_resolve, self.bytes[start..self.index]);
},
'\\' => {
self.state = .prereq_continuation;
self.index += 1;
},
else => {
self.index += 1;
},
},
.prereq_continuation => switch (char) {
'\n' => {
self.index += 1;
self.state = .rhs;
return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
},
'\r' => {
self.state = .prereq_continuation_linefeed;
self.index += 1;
},
'\\' => {
// The previous \ wasn't a continuation, but this one might be.
self.index += 1;
},
' ' => {
// not continuation, but escaped space must be resolved
must_resolve = true;
self.state = .prereq;
self.index += 1;
},
else => {
// not continuation
self.state = .prereq;
self.index += 1;
},
},
.prereq_continuation_linefeed => switch (char) {
'\n' => {
self.index += 1;
self.state = .rhs;
return finishPrereq(must_resolve, self.bytes[start .. self.index - 3]);
},
else => {
return errorIllegalChar(.continuation_eol, self.index, char);
},
},
}
} else {
switch (self.state) {
.lhs,
.rhs,
.rhs_continuation,
.rhs_continuation_linefeed,
=> return null,
.target => {
return errorPosition(.incomplete_target, start, self.bytes[start..]);
},
.target_reverse_solidus,
.target_dollar_sign,
=> {
const idx = self.index - 1;
return errorIllegalChar(.incomplete_escape, idx, self.bytes[idx]);
},
.target_colon => {
const bytes = self.bytes[start .. self.index - 1];
if (bytes.len != 0) {
self.index += 1;
self.state = .rhs;
return finishTarget(must_resolve, bytes);
}
// silently ignore null target
self.state = .lhs;
return null;
},
.target_colon_reverse_solidus => {
const bytes = self.bytes[start .. self.index - 2];
if (bytes.len != 0) {
self.index += 1;
self.state = .rhs;
return finishTarget(must_resolve, bytes);
}
// silently ignore null target
self.state = .lhs;
return null;
},
.target_space => {
const idx = self.index - 1;
return errorIllegalChar(.expected_colon, idx, self.bytes[idx]);
},
.prereq_quote => {
return errorPosition(.incomplete_quoted_prerequisite, start, self.bytes[start..]);
},
.prereq => {
self.state = .lhs;
return finishPrereq(must_resolve, self.bytes[start..]);
},
.prereq_continuation => {
self.state = .lhs;
return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
},
.prereq_continuation_linefeed => {
self.state = .lhs;
return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
},
}
}
unreachable;
}
fn errorPosition(comptime id: std.meta.Tag(Token), index: usize, bytes: []const u8) Token {
return @unionInit(Token, @tagName(id), .{ .index = index, .bytes = bytes });
}
fn errorIllegalChar(comptime id: std.meta.Tag(Token), index: usize, char: u8) Token {
return @unionInit(Token, @tagName(id), .{ .index = index, .char = char });
}
fn finishTarget(must_resolve: bool, bytes: []const u8) Token {
return if (must_resolve) .{ .target_must_resolve = bytes } else .{ .target = bytes };
}
fn finishPrereq(must_resolve: bool, bytes: []const u8) Token {
return if (must_resolve) .{ .prereq_must_resolve = bytes } else .{ .prereq = bytes };
}
const State = enum {
lhs,
target,
target_reverse_solidus,
target_dollar_sign,
target_colon,
target_colon_reverse_solidus,
target_space,
rhs,
rhs_continuation,
rhs_continuation_linefeed,
prereq_quote,
prereq,
prereq_continuation,
prereq_continuation_linefeed,
};
pub const Token = union(enum) {
target: []const u8,
target_must_resolve: []const u8,
prereq: []const u8,
prereq_must_resolve: []const u8,
incomplete_quoted_prerequisite: IndexAndBytes,
incomplete_target: IndexAndBytes,
invalid_target: IndexAndChar,
bad_target_escape: IndexAndChar,
expected_dollar_sign: IndexAndChar,
continuation_eol: IndexAndChar,
incomplete_escape: IndexAndChar,
expected_colon: IndexAndChar,
pub const IndexAndChar = struct {
index: usize,
char: u8,
};
pub const IndexAndBytes = struct {
index: usize,
bytes: []const u8,
};
/// Resolve escapes in target or prereq. Only valid with .target_must_resolve or .prereq_must_resolve.
pub fn resolve(self: Token, writer: anytype) @TypeOf(writer).Error!void {
switch (self) {
.target_must_resolve => |bytes| {
var state: enum { start, escape, dollar } = .start;
for (bytes) |c| {
switch (state) {
.start => {
switch (c) {
'\\' => state = .escape,
'$' => state = .dollar,
else => try writer.writeByte(c),
}
},
.escape => {
switch (c) {
' ', '#', '\\' => {},
'$' => {
try writer.writeByte('\\');
state = .dollar;
continue;
},
else => try writer.writeByte('\\'),
}
try writer.writeByte(c);
state = .start;
},
.dollar => {
try writer.writeByte('$');
switch (c) {
'$' => {},
else => try writer.writeByte(c),
}
state = .start;
},
}
}
},
.prereq_must_resolve => |bytes| {
var state: enum { start, escape } = .start;
for (bytes) |c| {
switch (state) {
.start => {
switch (c) {
'\\' => state = .escape,
else => try writer.writeByte(c),
}
},
.escape => {
switch (c) {
' ' => {},
'\\' => {
try writer.writeByte(c);
continue;
},
else => try writer.writeByte('\\'),
}
try writer.writeByte(c);
state = .start;
},
}
}
},
else => unreachable,
}
}
pub fn printError(self: Token, writer: anytype) @TypeOf(writer).Error!void {
switch (self) {
.target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
.incomplete_quoted_prerequisite,
.incomplete_target,
=> |index_and_bytes| {
try writer.print("{s} '", .{self.errStr()});
if (self == .incomplete_target) {
const tmp = Token{ .target_must_resolve = index_and_bytes.bytes };
try tmp.resolve(writer);
} else {
try printCharValues(writer, index_and_bytes.bytes);
}
try writer.print("' at position {d}", .{index_and_bytes.index});
},
.invalid_target,
.bad_target_escape,
.expected_dollar_sign,
.continuation_eol,
.incomplete_escape,
.expected_colon,
=> |index_and_char| {
try writer.writeAll("illegal char ");
try printUnderstandableChar(writer, index_and_char.char);
try writer.print(" at position {d}: {s}", .{ index_and_char.index, self.errStr() });
},
}
}
fn errStr(self: Token) []const u8 {
return switch (self) {
.target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
.incomplete_quoted_prerequisite => "incomplete quoted prerequisite",
.incomplete_target => "incomplete target",
.invalid_target => "invalid target",
.bad_target_escape => "bad target escape",
.expected_dollar_sign => "expecting '$'",
.continuation_eol => "continuation expecting end-of-line",
.incomplete_escape => "incomplete escape",
.expected_colon => "expecting ':'",
};
}
};
test "empty file" {
try depTokenizer("", "");
}
test "empty whitespace" {
try depTokenizer("\n", "");
try depTokenizer("\r", "");
try depTokenizer("\r\n", "");
try depTokenizer(" ", "");
}
test "empty colon" {
try depTokenizer(":", "");
try depTokenizer("\n:", "");
try depTokenizer("\r:", "");
try depTokenizer("\r\n:", "");
try depTokenizer(" :", "");
}
test "empty target" {
try depTokenizer("foo.o:", "target = {foo.o}");
try depTokenizer(
\\foo.o:
\\bar.o:
\\abcd.o:
,
\\target = {foo.o}
\\target = {bar.o}
\\target = {abcd.o}
);
}
test "whitespace empty target" {
try depTokenizer("\nfoo.o:", "target = {foo.o}");
try depTokenizer("\rfoo.o:", "target = {foo.o}");
try depTokenizer("\r\nfoo.o:", "target = {foo.o}");
try depTokenizer(" foo.o:", "target = {foo.o}");
}
test "escape empty target" {
try depTokenizer("\\ foo.o:", "target = { foo.o}");
try depTokenizer("\\#foo.o:", "target = {#foo.o}");
try depTokenizer("\\\\foo.o:", "target = {\\foo.o}");
try depTokenizer("$$foo.o:", "target = {$foo.o}");
}
test "empty target linefeeds" {
try depTokenizer("\n", "");
try depTokenizer("\r\n", "");
const expect = "target = {foo.o}";
try depTokenizer(
\\foo.o:
, expect);
try depTokenizer(
\\foo.o:
\\
, expect);
try depTokenizer(
\\foo.o:
, expect);
try depTokenizer(
\\foo.o:
\\
, expect);
}
test "empty target linefeeds + continuations" {
const expect = "target = {foo.o}";
try depTokenizer(
\\foo.o:\
, expect);
try depTokenizer(
\\foo.o:\
\\
, expect);
try depTokenizer(
\\foo.o:\
, expect);
try depTokenizer(
\\foo.o:\
\\
, expect);
}
test "empty target linefeeds + hspace + continuations" {
const expect = "target = {foo.o}";
try depTokenizer(
\\foo.o: \
, expect);
try depTokenizer(
\\foo.o: \
\\
, expect);
try depTokenizer(
\\foo.o: \
, expect);
try depTokenizer(
\\foo.o: \
\\
, expect);
}
test "empty target + hspace + colon" {
const expect = "target = {foo.o}";
try depTokenizer("foo.o :", expect);
try depTokenizer("foo.o\t\t\t:", expect);
try depTokenizer("foo.o \t \t :", expect);
try depTokenizer("\r\nfoo.o :", expect);
try depTokenizer(" foo.o :", expect);
}
test "prereq" {
const expect =
\\target = {foo.o}
\\prereq = {foo.c}
;
try depTokenizer("foo.o: foo.c", expect);
try depTokenizer(
\\foo.o: \
\\foo.c
, expect);
try depTokenizer(
\\foo.o: \
\\ foo.c
, expect);
try depTokenizer(
\\foo.o: \
\\ foo.c
, expect);
}
test "prereq continuation" {
const expect =
\\target = {foo.o}
\\prereq = {foo.h}
\\prereq = {bar.h}
;
try depTokenizer(
\\foo.o: foo.h\
\\bar.h
, expect);
try depTokenizer(
\\foo.o: foo.h\
\\bar.h
, expect);
}
test "prereq continuation (CRLF)" {
const expect =
\\target = {foo.o}
\\prereq = {foo.h}
\\prereq = {bar.h}
;
try depTokenizer("foo.o: foo.h\\\r\nbar.h", expect);
}
test "multiple prereqs" {
const expect =
\\target = {foo.o}
\\prereq = {foo.c}
\\prereq = {foo.h}
\\prereq = {bar.h}
;
try depTokenizer("foo.o: foo.c foo.h bar.h", expect);
try depTokenizer(
\\foo.o: \
\\foo.c foo.h bar.h
, expect);
try depTokenizer(
\\foo.o: foo.c foo.h bar.h\
, expect);
try depTokenizer(
\\foo.o: foo.c foo.h bar.h\
\\
, expect);
try depTokenizer(
\\foo.o: \
\\foo.c \
\\ foo.h\
\\bar.h
\\
, expect);
try depTokenizer(
\\foo.o: \
\\foo.c \
\\ foo.h\
\\bar.h\
\\
, expect);
try depTokenizer(
\\foo.o: \
\\foo.c \
\\ foo.h\
\\bar.h\
, expect);
}
test "multiple targets and prereqs" {
try depTokenizer(
\\foo.o: foo.c
\\bar.o: bar.c a.h b.h c.h
\\abc.o: abc.c \
\\ one.h two.h \
\\ three.h four.h
,
\\target = {foo.o}
\\prereq = {foo.c}
\\target = {bar.o}
\\prereq = {bar.c}
\\prereq = {a.h}
\\prereq = {b.h}
\\prereq = {c.h}
\\target = {abc.o}
\\prereq = {abc.c}
\\prereq = {one.h}
\\prereq = {two.h}
\\prereq = {three.h}
\\prereq = {four.h}
);
try depTokenizer(
\\ascii.o: ascii.c
\\base64.o: base64.c stdio.h
\\elf.o: elf.c a.h b.h c.h
\\macho.o: \
\\ macho.c\
\\ a.h b.h c.h
,
\\target = {ascii.o}
\\prereq = {ascii.c}
\\target = {base64.o}
\\prereq = {base64.c}
\\prereq = {stdio.h}
\\target = {elf.o}
\\prereq = {elf.c}
\\prereq = {a.h}
\\prereq = {b.h}
\\prereq = {c.h}
\\target = {macho.o}
\\prereq = {macho.c}
\\prereq = {a.h}
\\prereq = {b.h}
\\prereq = {c.h}
);
try depTokenizer(
\\a$$scii.o: ascii.c
\\\\base64.o: "\base64.c" "s t#dio.h"
\\e\\lf.o: "e\lf.c" "a.h$$" "$$b.h c.h$$"
\\macho.o: \
\\ "macho!.c" \
\\ a.h b.h c.h
,
\\target = {a$scii.o}
\\prereq = {ascii.c}
\\target = {\base64.o}
\\prereq = {\base64.c}
\\prereq = {s t#dio.h}
\\target = {e\lf.o}
\\prereq = {e\lf.c}
\\prereq = {a.h$$}
\\prereq = {$$b.h c.h$$}
\\target = {macho.o}
\\prereq = {macho!.c}
\\prereq = {a.h}
\\prereq = {b.h}
\\prereq = {c.h}
);
}
test "windows quoted prereqs" {
try depTokenizer(
\\c:\foo.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo.c"
\\c:\foo2.o: "C:\Program Files (x86)\Microsoft Visual Studio\foo2.c" \
\\ "C:\Program Files (x86)\Microsoft Visual Studio\foo1.h" \
\\ "C:\Program Files (x86)\Microsoft Visual Studio\foo2.h"
,
\\target = {c:\foo.o}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo.c}
\\target = {c:\foo2.o}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.c}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo1.h}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\foo2.h}
);
}
test "windows mixed prereqs" {
try depTokenizer(
\\cimport.o: \
\\ C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h" \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h" \
\\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h" \
\\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h" \
\\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h" \
\\ C:\msys64\opt\zig\lib\zig\include\vadefs.h \
\\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h" \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h" \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h" \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h" \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h" \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h" \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h" \
\\ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h" \
\\ "C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h"
,
\\target = {cimport.o}
\\prereq = {C:\msys64\home\anon\project\zig\master\zig-cache\o\qhvhbUo7GU5iKyQ5mpA8TcQpncCYaQu0wwvr3ybiSTj_Dtqi1Nmcb70kfODJ2Qlg\cimport.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\stdio.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt.h}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime.h}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\sal.h}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\concurrencysal.h}
\\prereq = {C:\msys64\opt\zig\lib\zig\include\vadefs.h}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vadefs.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstdio.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_stdio_config.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\string.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memory.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_memcpy_s.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\errno.h}
\\prereq = {C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.21.27702\lib\x64\\..\..\include\vcruntime_string.h}
\\prereq = {C:\Program Files (x86)\Windows Kits\10\\Include\10.0.17763.0\ucrt\corecrt_wstring.h}
);
}
test "windows funky targets" {
try depTokenizer(
\\C:\Users\anon\foo.o:
\\C:\Users\anon\foo\ .o:
\\C:\Users\anon\foo\#.o:
\\C:\Users\anon\foo$$.o:
\\C:\Users\anon\\\ foo.o:
\\C:\Users\anon\\#foo.o:
\\C:\Users\anon\$$foo.o:
\\C:\Users\anon\\\ \ \ \ \ foo.o:
,
\\target = {C:\Users\anon\foo.o}
\\target = {C:\Users\anon\foo .o}
\\target = {C:\Users\anon\foo#.o}
\\target = {C:\Users\anon\foo$.o}
\\target = {C:\Users\anon\ foo.o}
\\target = {C:\Users\anon\#foo.o}
\\target = {C:\Users\anon\$foo.o}
\\target = {C:\Users\anon\ foo.o}
);
}
test "windows funky prereqs" {
// Note we don't support unquoted escaped spaces at the very beginning of a relative path
// e.g. `\ SpaceAtTheBeginning.c`
// This typically wouldn't be seen in the wild, since depfiles usually use absolute paths
// and supporting it would degrade error messages for cases where it was meant to be a
// continuation, but the line ending is missing.
try depTokenizer(
\\cimport.o: \
\\ trailingbackslash\\
\\ C:\Users\John\ Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c \
\\ somedir\\ a.c\
\\ somedir/\ a.c\
\\ somedir\\ \ \ b.c\
\\ somedir\\ \\ \c.c\
\\
,
\\target = {cimport.o}
\\prereq = {trailingbackslash\}
\\prereq = {C:\Users\John Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c}
\\prereq = {somedir\ a.c}
\\prereq = {somedir/ a.c}
\\prereq = {somedir\ b.c}
\\prereq = {somedir\ \ \c.c}
);
}
test "windows drive and forward slashes" {
try depTokenizer(
\\C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj: \
\\ C:/msys64/opt/zig3/lib/zig/libc/mingw/crt/cxa_thread_atexit.c
,
\\target = {C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj}
\\prereq = {C:/msys64/opt/zig3/lib/zig/libc/mingw/crt/cxa_thread_atexit.c}
);
}
test "error incomplete escape - reverse_solidus" {
try depTokenizer("\\",
\\ERROR: illegal char '\' at position 0: incomplete escape
);
try depTokenizer("\t\\",
\\ERROR: illegal char '\' at position 1: incomplete escape
);
try depTokenizer("\n\\",
\\ERROR: illegal char '\' at position 1: incomplete escape
);
try depTokenizer("\r\\",
\\ERROR: illegal char '\' at position 1: incomplete escape
);
try depTokenizer("\r\n\\",
\\ERROR: illegal char '\' at position 2: incomplete escape
);
try depTokenizer(" \\",
\\ERROR: illegal char '\' at position 1: incomplete escape
);
}
test "error incomplete escape - dollar_sign" {
try depTokenizer("$",
\\ERROR: illegal char '$' at position 0: incomplete escape
);
try depTokenizer("\t$",
\\ERROR: illegal char '$' at position 1: incomplete escape
);
try depTokenizer("\n$",
\\ERROR: illegal char '$' at position 1: incomplete escape
);
try depTokenizer("\r$",
\\ERROR: illegal char '$' at position 1: incomplete escape
);
try depTokenizer("\r\n$",
\\ERROR: illegal char '$' at position 2: incomplete escape
);
try depTokenizer(" $",
\\ERROR: illegal char '$' at position 1: incomplete escape
);
}
test "error incomplete target" {
try depTokenizer("foo.o",
\\ERROR: incomplete target 'foo.o' at position 0
);
try depTokenizer("\tfoo.o",
\\ERROR: incomplete target 'foo.o' at position 1
);
try depTokenizer("\nfoo.o",
\\ERROR: incomplete target 'foo.o' at position 1
);
try depTokenizer("\rfoo.o",
\\ERROR: incomplete target 'foo.o' at position 1
);
try depTokenizer("\r\nfoo.o",
\\ERROR: incomplete target 'foo.o' at position 2
);
try depTokenizer(" foo.o",
\\ERROR: incomplete target 'foo.o' at position 1
);
try depTokenizer("\\ foo.o",
\\ERROR: incomplete target ' foo.o' at position 0
);
try depTokenizer("\\#foo.o",
\\ERROR: incomplete target '#foo.o' at position 0
);
try depTokenizer("\\\\foo.o",
\\ERROR: incomplete target '\foo.o' at position 0
);
try depTokenizer("$$foo.o",
\\ERROR: incomplete target '$foo.o' at position 0
);
}
test "error illegal char at position - bad target escape" {
try depTokenizer("\\\t",
\\ERROR: illegal char \x09 at position 1: bad target escape
);
try depTokenizer("\\\n",
\\ERROR: illegal char \x0A at position 1: bad target escape
);
try depTokenizer("\\\r",
\\ERROR: illegal char \x0D at position 1: bad target escape
);
try depTokenizer("\\\r\n",
\\ERROR: illegal char \x0D at position 1: bad target escape
);
}
test "error illegal char at position - expecting dollar_sign" {
try depTokenizer("$\t",
\\ERROR: illegal char \x09 at position 1: expecting '$'
);
try depTokenizer("$\n",
\\ERROR: illegal char \x0A at position 1: expecting '$'
);
try depTokenizer("$\r",
\\ERROR: illegal char \x0D at position 1: expecting '$'
);
try depTokenizer("$\r\n",
\\ERROR: illegal char \x0D at position 1: expecting '$'
);
}
test "error illegal char at position - invalid target" {
try depTokenizer("foo\n.o",
\\ERROR: illegal char \x0A at position 3: invalid target
);
try depTokenizer("foo\r.o",
\\ERROR: illegal char \x0D at position 3: invalid target
);
try depTokenizer("foo\r\n.o",
\\ERROR: illegal char \x0D at position 3: invalid target
);
}
test "error target - continuation expecting end-of-line" {
try depTokenizer("foo.o: \\\t",
\\target = {foo.o}
\\ERROR: illegal char \x09 at position 8: continuation expecting end-of-line
);
try depTokenizer("foo.o: \\ ",
\\target = {foo.o}
\\ERROR: illegal char ' ' at position 8: continuation expecting end-of-line
);
try depTokenizer("foo.o: \\x",
\\target = {foo.o}
\\ERROR: illegal char 'x' at position 8: continuation expecting end-of-line
);
try depTokenizer("foo.o: \\\x0dx",
\\target = {foo.o}
\\ERROR: illegal char 'x' at position 9: continuation expecting end-of-line
);
}
test "error prereq - continuation expecting end-of-line" {
try depTokenizer("foo.o: foo.h\\\x0dx",
\\target = {foo.o}
\\ERROR: illegal char 'x' at position 14: continuation expecting end-of-line
);
}
test "error illegal char at position - expecting colon" {
try depTokenizer("foo\t.o:",
\\target = {foo}
\\ERROR: illegal char '.' at position 4: expecting ':'
);
try depTokenizer("foo .o:",
\\target = {foo}
\\ERROR: illegal char '.' at position 4: expecting ':'
);
try depTokenizer("foo \n.o:",
\\target = {foo}
\\ERROR: illegal char \x0A at position 4: expecting ':'
);
try depTokenizer("foo.o\t\n:",
\\target = {foo.o}
\\ERROR: illegal char \x0A at position 6: expecting ':'
);
}
// - tokenize input, emit textual representation, and compare to expect
fn depTokenizer(input: []const u8, expect: []const u8) !void {
var arena_allocator = std.heap.ArenaAllocator.init(std.testing.allocator);
const arena = arena_allocator.allocator();
defer arena_allocator.deinit();
var it: Tokenizer = .{ .bytes = input };
var buffer = std.ArrayList(u8).init(arena);
var resolve_buf = std.ArrayList(u8).init(arena);
var i: usize = 0;
while (it.next()) |token| {
if (i != 0) try buffer.appendSlice("\n");
switch (token) {
.target, .prereq => |bytes| {
try buffer.appendSlice(@tagName(token));
try buffer.appendSlice(" = {");
for (bytes) |b| {
try buffer.append(printable_char_tab[b]);
}
try buffer.appendSlice("}");
},
.target_must_resolve => {
try buffer.appendSlice("target = {");
try token.resolve(resolve_buf.writer());
for (resolve_buf.items) |b| {
try buffer.append(printable_char_tab[b]);
}
resolve_buf.items.len = 0;
try buffer.appendSlice("}");
},
.prereq_must_resolve => {
try buffer.appendSlice("prereq = {");
try token.resolve(resolve_buf.writer());
for (resolve_buf.items) |b| {
try buffer.append(printable_char_tab[b]);
}
resolve_buf.items.len = 0;
try buffer.appendSlice("}");
},
else => {
try buffer.appendSlice("ERROR: ");
try token.printError(buffer.writer());
break;
},
}
i += 1;
}
if (std.mem.eql(u8, expect, buffer.items)) {
try testing.expect(true);
return;
}
const out = std.io.getStdErr().writer();
try out.writeAll("\n");
try printSection(out, "<<<< input", input);
try printSection(out, "==== expect", expect);
try printSection(out, ">>>> got", buffer.items);
try printRuler(out);
try testing.expect(false);
}
fn printSection(out: anytype, label: []const u8, bytes: []const u8) !void {
try printLabel(out, label, bytes);
try hexDump(out, bytes);
try printRuler(out);
try out.writeAll(bytes);
try out.writeAll("\n");
}
fn printLabel(out: anytype, label: []const u8, bytes: []const u8) !void {
var buf: [80]u8 = undefined;
const text = try std.fmt.bufPrint(buf[0..], "{s} {d} bytes ", .{ label, bytes.len });
try out.writeAll(text);
var i: usize = text.len;
const end = 79;
while (i < end) : (i += 1) {
try out.writeAll(&[_]u8{label[0]});
}
try out.writeAll("\n");
}
fn printRuler(out: anytype) !void {
var i: usize = 0;
const end = 79;
while (i < end) : (i += 1) {
try out.writeAll("-");
}
try out.writeAll("\n");
}
fn hexDump(out: anytype, bytes: []const u8) !void {
const n16 = bytes.len >> 4;
var line: usize = 0;
var offset: usize = 0;
while (line < n16) : (line += 1) {
try hexDump16(out, offset, bytes[offset..][0..16]);
offset += 16;
}
const n = bytes.len & 0x0f;
if (n > 0) {
try printDecValue(out, offset, 8);
try out.writeAll(":");
try out.writeAll(" ");
const end1 = @min(offset + n, offset + 8);
for (bytes[offset..end1]) |b| {
try out.writeAll(" ");
try printHexValue(out, b, 2);
}
const end2 = offset + n;
if (end2 > end1) {
try out.writeAll(" ");
for (bytes[end1..end2]) |b| {
try out.writeAll(" ");
try printHexValue(out, b, 2);
}
}
const short = 16 - n;
var i: usize = 0;
while (i < short) : (i += 1) {
try out.writeAll(" ");
}
if (end2 > end1) {
try out.writeAll(" |");
} else {
try out.writeAll(" |");
}
try printCharValues(out, bytes[offset..end2]);
try out.writeAll("|\n");
offset += n;
}
try printDecValue(out, offset, 8);
try out.writeAll(":");
try out.writeAll("\n");
}
fn hexDump16(out: anytype, offset: usize, bytes: []const u8) !void {
try printDecValue(out, offset, 8);
try out.writeAll(":");
try out.writeAll(" ");
for (bytes[0..8]) |b| {
try out.writeAll(" ");
try printHexValue(out, b, 2);
}
try out.writeAll(" ");
for (bytes[8..16]) |b| {
try out.writeAll(" ");
try printHexValue(out, b, 2);
}
try out.writeAll(" |");
try printCharValues(out, bytes);
try out.writeAll("|\n");
}
fn printDecValue(out: anytype, value: u64, width: u8) !void {
var buffer: [20]u8 = undefined;
const len = std.fmt.formatIntBuf(buffer[0..], value, 10, .lower, .{ .width = width, .fill = '0' });
try out.writeAll(buffer[0..len]);
}
fn printHexValue(out: anytype, value: u64, width: u8) !void {
var buffer: [16]u8 = undefined;
const len = std.fmt.formatIntBuf(buffer[0..], value, 16, .lower, .{ .width = width, .fill = '0' });
try out.writeAll(buffer[0..len]);
}
fn printCharValues(out: anytype, bytes: []const u8) !void {
for (bytes) |b| {
try out.writeAll(&[_]u8{printable_char_tab[b]});
}
}
fn printUnderstandableChar(out: anytype, char: u8) !void {
if (std.ascii.isPrint(char)) {
try out.print("'{c}'", .{char});
} else {
try out.print("\\x{X:0>2}", .{char});
}
}
// zig fmt: off
const printable_char_tab: [256]u8 = (
"................................ !\"#$%&'()*+,-./0123456789:;<=>?" ++
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~." ++
"................................................................" ++
"................................................................"
).*;