chat: fix whitespace problems once and for all (#24624)

* chat: fix whitespace problems once and for all

* Purge trailing spaces from grammar generation

* Revert "Purge trailing spaces from grammar generation"

This reverts commit b0827ecb7d.
This commit is contained in:
Piotr Wilkin (ilintar)
2026-06-15 08:27:10 +02:00
committed by GitHub
parent 2a6c391a5e
commit a6dff71270
6 changed files with 58 additions and 31 deletions
+2 -3
View File
@@ -134,7 +134,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs, cons
auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
parser = ctx.reasoning_parser + p.space() + p.choice({
p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
response_format
p.space() + response_format + p.space()
}) + p.end();
pure_content = false;
} else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
@@ -393,8 +393,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
(schema_info.resolves_to_string(param_schema) ?
p.tool_arg_string_value(until_suffix) :
p.tool_arg_json_value(p.schema(
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) +
p.space()) +
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false))) +
p.tool_arg_close(p.literal(arguments.value_suffix)));
auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
+3 -4
View File
@@ -1229,8 +1229,8 @@ void analyze_tools::extract_argument_name_markers() {
left_result.tags["pre"] == right_result.tags["pre"] &&
left_result.tags["suffix"] == right_result.tags["suffix"]) {
// Name is inside a structure (e.g., JSON key): prefix is the shared wrapper
arguments.name_prefix = trim_whitespace(left_result.tags["pre"]);
arguments.name_suffix = trim_leading_whitespace(left_result.tags["suffix"]);
arguments.name_prefix = left_result.tags["pre"];
arguments.name_suffix = left_result.tags["suffix"];
} else if (diff.left.substr(0, ARG_FIRST.length()) == ARG_FIRST && diff.right.substr(0, ARG_SECOND.length()) == ARG_SECOND) {
// Name is directly in the diff: prefix comes from last marker in diff.prefix
auto pre_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
@@ -1315,8 +1315,7 @@ void analyze_tools::extract_argument_value_markers() {
value_suffix = value_suffix.substr(0, end_marker_pos);
}
}
value_suffix = trim_leading_whitespace(value_suffix);
if (!value_suffix.empty()) {
if (!trim_whitespace(value_suffix).empty()) {
arguments.value_suffix = value_suffix;
}
}
+1 -1
View File
@@ -363,7 +363,7 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
}
if ((is_arg_value || is_arg_string_value) && current_tool) {
std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
std::string value_content = std::string(node.text);
std::string value_to_add;
if (value_content.empty() && is_arg_string_value) {
+10 -15
View File
@@ -1272,13 +1272,13 @@ common_peg_parser common_peg_parser_builder::string_content(char delimiter) {
common_peg_parser common_peg_parser_builder::double_quoted_string() {
return rule("double-quoted-string", [this]() {
return sequence({literal("\""), string_content('"'), literal("\""), space()});
return sequence({literal("\""), string_content('"'), literal("\"")});
});
}
common_peg_parser common_peg_parser_builder::single_quoted_string() {
return rule("single-quoted-string", [this]() {
return sequence({literal("'"), string_content('\''), literal("'"), space()});
return sequence({literal("'"), string_content('\''), literal("'")});
});
}
@@ -1301,25 +1301,25 @@ common_peg_parser common_peg_parser_builder::json_number() {
// At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed.
// This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming).
auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1));
return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() });
return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation });
});
}
common_peg_parser common_peg_parser_builder::json_string() {
return rule("json-string", [this]() {
return sequence({literal("\""), string_content('"'), literal("\""), space()});
return sequence({literal("\""), string_content('"'), literal("\"")});
});
}
common_peg_parser common_peg_parser_builder::json_bool() {
return rule("json-bool", [this]() {
return sequence({choice({literal("true"), literal("false")}), space()});
return choice({literal("true"), literal("false")});
});
}
common_peg_parser common_peg_parser_builder::json_null() {
return rule("json-null", [this]() {
return sequence({literal("null"), space()});
return literal("null");
});
}
@@ -1334,8 +1334,7 @@ common_peg_parser common_peg_parser_builder::json_object() {
choice({
literal("}"),
sequence({members, ws, literal("}")})
}),
ws
})
});
});
}
@@ -1350,8 +1349,7 @@ common_peg_parser common_peg_parser_builder::json_array() {
choice({
literal("]"),
sequence({elements, ws, literal("]")})
}),
ws
})
});
});
}
@@ -1381,16 +1379,13 @@ common_peg_parser common_peg_parser_builder::python_number() {
common_peg_parser common_peg_parser_builder::python_bool() {
return rule("python-bool", [this]() {
return sequence({
choice({literal("True"), literal("False")}),
space()
});
return choice({literal("True"), literal("False")});
});
}
common_peg_parser common_peg_parser_builder::python_null() {
return rule("python-none", [this]() {
return sequence({literal("None"), space()});
return literal("None");
});
}
+3 -4
View File
@@ -1369,7 +1369,7 @@ static void test_nemotron_tool_format(testing & t) {
// Check argument markers (note: markers retain trailing newlines for proper parsing)
t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.tools.arguments.name_prefix);
t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.tools.arguments.name_suffix);
t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.tools.arguments.value_suffix);
t.assert_equal("arg_value_suffix should be '\\n</parameter>\\n'", "\n</parameter>\n", analysis.tools.arguments.value_suffix);
// Check format classification
t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED);
@@ -2030,12 +2030,11 @@ static void test_tagged_args_with_embedded_quotes(testing & t) {
return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
});
// The exact input from the failing test
std::string input =
"<seed:tool_call>\n"
"<function=edit>\n"
"<parameter=filename>\n"
"foo.cpp\n"
"<parameter=filename>"
"foo.cpp"
"</parameter>\n"
"<parameter=oldString>"
"def foo(arg = \"14\"):\n"
+39 -4
View File
@@ -1935,6 +1935,10 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
}
})";
const char * const_schema = R"({
"const": "42"
})";
{
// Qwen3.5 (basically same as Nemotron, but keeping separate tests just in case)
auto tst = peg_tester("models/templates/Qwen3.5-4B.jinja", detailed_debug);
@@ -2020,6 +2024,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
})
.run();
// test code that starts with indent
tst.test(
"<tool_call>\n"
"<function=python>\n"
"<parameter=code>\n"
" print(\"Hello, world!\")\n"
"</parameter>\n"
"</function>\n"
"</tool_call>")
.enable_thinking(false)
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.tools({
python_tool
})
.expect_tool_calls({
{ "python", "{\"code\": \" print(\\\"Hello, world!\\\")\"}", {} },
})
.run();
tst.test(
"I need to output the invoice details in JSON\n"
"</think>\n\n"
@@ -3196,18 +3219,16 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
tst.test(
"<seed:tool_call>\n"
"<function=edit>\n"
"<parameter=filename>\n"
"foo.cpp\n"
"<parameter=filename>"
"foo.cpp"
"</parameter>\n"
"<parameter=oldString>"
"def foo(arg = \"14\"):\n"
" return arg + \"bar\"\n"
"\n"
"</parameter>\n"
"<parameter=newString>"
"def foo(arg = \"15\"):\n"
" pass\n"
"\n"
"</parameter>\n"
"</function>\n"
"</seed:tool_call>")
@@ -4927,6 +4948,20 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).expect_reconstruction().run();
tst.test(
"```json\n\"42\" \n```")
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.json_schema(const_schema)
.expect_content(R"("42")")
.run();
tst.test(
"\"42\" \n")
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.json_schema(const_schema)
.expect_content(R"("42")")
.run();
// Continuation tests
tst.test("world!\nWhat's up?")
.messages({ message_user, message_assist_prefill_content })