mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-26 06:10:19 +00:00
chat: fix whitespace problems once and for all (#24624)
* chat: fix whitespace problems once and for all
* Purge trailing spaces from grammar generation
* Revert "Purge trailing spaces from grammar generation"
This reverts commit b0827ecb7d.
This commit is contained in:
committed by
GitHub
parent
2a6c391a5e
commit
a6dff71270
@@ -134,7 +134,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs, cons
|
||||
auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
|
||||
parser = ctx.reasoning_parser + p.space() + p.choice({
|
||||
p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
|
||||
response_format
|
||||
p.space() + response_format + p.space()
|
||||
}) + p.end();
|
||||
pure_content = false;
|
||||
} else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
|
||||
@@ -393,8 +393,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
|
||||
(schema_info.resolves_to_string(param_schema) ?
|
||||
p.tool_arg_string_value(until_suffix) :
|
||||
p.tool_arg_json_value(p.schema(
|
||||
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) +
|
||||
p.space()) +
|
||||
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false))) +
|
||||
p.tool_arg_close(p.literal(arguments.value_suffix)));
|
||||
|
||||
auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
|
||||
|
||||
@@ -1229,8 +1229,8 @@ void analyze_tools::extract_argument_name_markers() {
|
||||
left_result.tags["pre"] == right_result.tags["pre"] &&
|
||||
left_result.tags["suffix"] == right_result.tags["suffix"]) {
|
||||
// Name is inside a structure (e.g., JSON key): prefix is the shared wrapper
|
||||
arguments.name_prefix = trim_whitespace(left_result.tags["pre"]);
|
||||
arguments.name_suffix = trim_leading_whitespace(left_result.tags["suffix"]);
|
||||
arguments.name_prefix = left_result.tags["pre"];
|
||||
arguments.name_suffix = left_result.tags["suffix"];
|
||||
} else if (diff.left.substr(0, ARG_FIRST.length()) == ARG_FIRST && diff.right.substr(0, ARG_SECOND.length()) == ARG_SECOND) {
|
||||
// Name is directly in the diff: prefix comes from last marker in diff.prefix
|
||||
auto pre_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
|
||||
@@ -1315,8 +1315,7 @@ void analyze_tools::extract_argument_value_markers() {
|
||||
value_suffix = value_suffix.substr(0, end_marker_pos);
|
||||
}
|
||||
}
|
||||
value_suffix = trim_leading_whitespace(value_suffix);
|
||||
if (!value_suffix.empty()) {
|
||||
if (!trim_whitespace(value_suffix).empty()) {
|
||||
arguments.value_suffix = value_suffix;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -363,7 +363,7 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
||||
}
|
||||
|
||||
if ((is_arg_value || is_arg_string_value) && current_tool) {
|
||||
std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
|
||||
std::string value_content = std::string(node.text);
|
||||
|
||||
std::string value_to_add;
|
||||
if (value_content.empty() && is_arg_string_value) {
|
||||
|
||||
+10
-15
@@ -1272,13 +1272,13 @@ common_peg_parser common_peg_parser_builder::string_content(char delimiter) {
|
||||
|
||||
common_peg_parser common_peg_parser_builder::double_quoted_string() {
|
||||
return rule("double-quoted-string", [this]() {
|
||||
return sequence({literal("\""), string_content('"'), literal("\""), space()});
|
||||
return sequence({literal("\""), string_content('"'), literal("\"")});
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::single_quoted_string() {
|
||||
return rule("single-quoted-string", [this]() {
|
||||
return sequence({literal("'"), string_content('\''), literal("'"), space()});
|
||||
return sequence({literal("'"), string_content('\''), literal("'")});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1301,25 +1301,25 @@ common_peg_parser common_peg_parser_builder::json_number() {
|
||||
// At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed.
|
||||
// This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming).
|
||||
auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1));
|
||||
return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() });
|
||||
return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation });
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::json_string() {
|
||||
return rule("json-string", [this]() {
|
||||
return sequence({literal("\""), string_content('"'), literal("\""), space()});
|
||||
return sequence({literal("\""), string_content('"'), literal("\"")});
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::json_bool() {
|
||||
return rule("json-bool", [this]() {
|
||||
return sequence({choice({literal("true"), literal("false")}), space()});
|
||||
return choice({literal("true"), literal("false")});
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::json_null() {
|
||||
return rule("json-null", [this]() {
|
||||
return sequence({literal("null"), space()});
|
||||
return literal("null");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1334,8 +1334,7 @@ common_peg_parser common_peg_parser_builder::json_object() {
|
||||
choice({
|
||||
literal("}"),
|
||||
sequence({members, ws, literal("}")})
|
||||
}),
|
||||
ws
|
||||
})
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -1350,8 +1349,7 @@ common_peg_parser common_peg_parser_builder::json_array() {
|
||||
choice({
|
||||
literal("]"),
|
||||
sequence({elements, ws, literal("]")})
|
||||
}),
|
||||
ws
|
||||
})
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -1381,16 +1379,13 @@ common_peg_parser common_peg_parser_builder::python_number() {
|
||||
|
||||
common_peg_parser common_peg_parser_builder::python_bool() {
|
||||
return rule("python-bool", [this]() {
|
||||
return sequence({
|
||||
choice({literal("True"), literal("False")}),
|
||||
space()
|
||||
});
|
||||
return choice({literal("True"), literal("False")});
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::python_null() {
|
||||
return rule("python-none", [this]() {
|
||||
return sequence({literal("None"), space()});
|
||||
return literal("None");
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1369,7 +1369,7 @@ static void test_nemotron_tool_format(testing & t) {
|
||||
// Check argument markers (note: markers retain trailing newlines for proper parsing)
|
||||
t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.tools.arguments.name_prefix);
|
||||
t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.tools.arguments.name_suffix);
|
||||
t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.tools.arguments.value_suffix);
|
||||
t.assert_equal("arg_value_suffix should be '\\n</parameter>\\n'", "\n</parameter>\n", analysis.tools.arguments.value_suffix);
|
||||
|
||||
// Check format classification
|
||||
t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED);
|
||||
@@ -2030,12 +2030,11 @@ static void test_tagged_args_with_embedded_quotes(testing & t) {
|
||||
return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
|
||||
});
|
||||
|
||||
// The exact input from the failing test
|
||||
std::string input =
|
||||
"<seed:tool_call>\n"
|
||||
"<function=edit>\n"
|
||||
"<parameter=filename>\n"
|
||||
"foo.cpp\n"
|
||||
"<parameter=filename>"
|
||||
"foo.cpp"
|
||||
"</parameter>\n"
|
||||
"<parameter=oldString>"
|
||||
"def foo(arg = \"14\"):\n"
|
||||
|
||||
+39
-4
@@ -1935,6 +1935,10 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
}
|
||||
})";
|
||||
|
||||
const char * const_schema = R"({
|
||||
"const": "42"
|
||||
})";
|
||||
|
||||
{
|
||||
// Qwen3.5 (basically same as Nemotron, but keeping separate tests just in case)
|
||||
auto tst = peg_tester("models/templates/Qwen3.5-4B.jinja", detailed_debug);
|
||||
@@ -2020,6 +2024,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
})
|
||||
.run();
|
||||
|
||||
// test code that starts with indent
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=python>\n"
|
||||
"<parameter=code>\n"
|
||||
" print(\"Hello, world!\")\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.enable_thinking(false)
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.tools({
|
||||
python_tool
|
||||
})
|
||||
.expect_tool_calls({
|
||||
{ "python", "{\"code\": \" print(\\\"Hello, world!\\\")\"}", {} },
|
||||
})
|
||||
.run();
|
||||
|
||||
tst.test(
|
||||
"I need to output the invoice details in JSON\n"
|
||||
"</think>\n\n"
|
||||
@@ -3196,18 +3219,16 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
tst.test(
|
||||
"<seed:tool_call>\n"
|
||||
"<function=edit>\n"
|
||||
"<parameter=filename>\n"
|
||||
"foo.cpp\n"
|
||||
"<parameter=filename>"
|
||||
"foo.cpp"
|
||||
"</parameter>\n"
|
||||
"<parameter=oldString>"
|
||||
"def foo(arg = \"14\"):\n"
|
||||
" return arg + \"bar\"\n"
|
||||
"\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=newString>"
|
||||
"def foo(arg = \"15\"):\n"
|
||||
" pass\n"
|
||||
"\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</seed:tool_call>")
|
||||
@@ -4927,6 +4948,20 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
|
||||
tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).expect_reconstruction().run();
|
||||
|
||||
tst.test(
|
||||
"```json\n\"42\" \n```")
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.json_schema(const_schema)
|
||||
.expect_content(R"("42")")
|
||||
.run();
|
||||
|
||||
tst.test(
|
||||
"\"42\" \n")
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.json_schema(const_schema)
|
||||
.expect_content(R"("42")")
|
||||
.run();
|
||||
|
||||
// Continuation tests
|
||||
tst.test("world!\nWhat's up?")
|
||||
.messages({ message_user, message_assist_prefill_content })
|
||||
|
||||
Reference in New Issue
Block a user