diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp index db3a6cc6fe..6f825a131b 100644 --- a/common/chat-auto-parser-generator.cpp +++ b/common/chat-auto-parser-generator.cpp @@ -134,7 +134,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs, cons auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema))); parser = ctx.reasoning_parser + p.space() + p.choice({ p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"), - response_format + p.space() + response_format + p.space() }) + p.end(); pure_content = false; } else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) { @@ -393,8 +393,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte (schema_info.resolves_to_string(param_schema) ? p.tool_arg_string_value(until_suffix) : p.tool_arg_json_value(p.schema( - p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) + - p.space()) + + p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false))) + p.tool_arg_close(p.literal(arguments.value_suffix))); auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg); diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp index 0875c5347f..ecd9c807c1 100644 --- a/common/chat-diff-analyzer.cpp +++ b/common/chat-diff-analyzer.cpp @@ -1229,8 +1229,8 @@ void analyze_tools::extract_argument_name_markers() { left_result.tags["pre"] == right_result.tags["pre"] && left_result.tags["suffix"] == right_result.tags["suffix"]) { // Name is inside a structure (e.g., JSON key): prefix is the shared wrapper - arguments.name_prefix = trim_whitespace(left_result.tags["pre"]); - arguments.name_suffix = trim_leading_whitespace(left_result.tags["suffix"]); + arguments.name_prefix = left_result.tags["pre"]; + arguments.name_suffix = left_result.tags["suffix"]; } else if (diff.left.substr(0, ARG_FIRST.length()) == ARG_FIRST && diff.right.substr(0, ARG_SECOND.length()) == ARG_SECOND) { // Name is directly in the diff: prefix comes from last marker in diff.prefix auto pre_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) { @@ -1315,8 +1315,7 @@ void analyze_tools::extract_argument_value_markers() { value_suffix = value_suffix.substr(0, end_marker_pos); } } - value_suffix = trim_leading_whitespace(value_suffix); - if (!value_suffix.empty()) { + if (!trim_whitespace(value_suffix).empty()) { arguments.value_suffix = value_suffix; } } diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp index 9bc5ac98be..23b5b38412 100644 --- a/common/chat-peg-parser.cpp +++ b/common/chat-peg-parser.cpp @@ -363,7 +363,7 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) { } if ((is_arg_value || is_arg_string_value) && current_tool) { - std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1)); + std::string value_content = std::string(node.text); std::string value_to_add; if (value_content.empty() && is_arg_string_value) { diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp index e37c1ce80e..310bebf735 100644 --- a/common/peg-parser.cpp +++ b/common/peg-parser.cpp @@ -1272,13 +1272,13 @@ common_peg_parser common_peg_parser_builder::string_content(char delimiter) { common_peg_parser common_peg_parser_builder::double_quoted_string() { return rule("double-quoted-string", [this]() { - return sequence({literal("\""), string_content('"'), literal("\""), space()}); + return sequence({literal("\""), string_content('"'), literal("\"")}); }); } common_peg_parser common_peg_parser_builder::single_quoted_string() { return rule("single-quoted-string", [this]() { - return sequence({literal("'"), string_content('\''), literal("'"), space()}); + return sequence({literal("'"), string_content('\''), literal("'")}); }); } @@ -1301,25 +1301,25 @@ common_peg_parser common_peg_parser_builder::json_number() { // At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed. // This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming). auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1)); - return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() }); + return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation }); }); } common_peg_parser common_peg_parser_builder::json_string() { return rule("json-string", [this]() { - return sequence({literal("\""), string_content('"'), literal("\""), space()}); + return sequence({literal("\""), string_content('"'), literal("\"")}); }); } common_peg_parser common_peg_parser_builder::json_bool() { return rule("json-bool", [this]() { - return sequence({choice({literal("true"), literal("false")}), space()}); + return choice({literal("true"), literal("false")}); }); } common_peg_parser common_peg_parser_builder::json_null() { return rule("json-null", [this]() { - return sequence({literal("null"), space()}); + return literal("null"); }); } @@ -1334,8 +1334,7 @@ common_peg_parser common_peg_parser_builder::json_object() { choice({ literal("}"), sequence({members, ws, literal("}")}) - }), - ws + }) }); }); } @@ -1350,8 +1349,7 @@ common_peg_parser common_peg_parser_builder::json_array() { choice({ literal("]"), sequence({elements, ws, literal("]")}) - }), - ws + }) }); }); } @@ -1381,16 +1379,13 @@ common_peg_parser common_peg_parser_builder::python_number() { common_peg_parser common_peg_parser_builder::python_bool() { return rule("python-bool", [this]() { - return sequence({ - choice({literal("True"), literal("False")}), - space() - }); + return choice({literal("True"), literal("False")}); }); } common_peg_parser common_peg_parser_builder::python_null() { return rule("python-none", [this]() { - return sequence({literal("None"), space()}); + return literal("None"); }); } diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index 6f8e957489..5cc1057532 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -1369,7 +1369,7 @@ static void test_nemotron_tool_format(testing & t) { // Check argument markers (note: markers retain trailing newlines for proper parsing) t.assert_equal("arg_name_prefix should be '\\n'", ">\n", analysis.tools.arguments.name_suffix); - t.assert_equal("arg_value_suffix should be '\\n'", "\n", analysis.tools.arguments.value_suffix); + t.assert_equal("arg_value_suffix should be '\\n\\n'", "\n\n", analysis.tools.arguments.value_suffix); // Check format classification t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED); @@ -2030,12 +2030,11 @@ static void test_tagged_args_with_embedded_quotes(testing & t) { return p.content(p.until("")) + p.optional(tool_section) + p.end(); }); - // The exact input from the failing test std::string input = "\n" "\n" - "\n" - "foo.cpp\n" + "" + "foo.cpp" "\n" "" "def foo(arg = \"14\"):\n" diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp index 29fd3db823..e65af46c63 100644 --- a/tests/test-chat.cpp +++ b/tests/test-chat.cpp @@ -1935,6 +1935,10 @@ static void test_template_output_peg_parsers(bool detailed_debug) { } })"; + const char * const_schema = R"({ + "const": "42" + })"; + { // Qwen3.5 (basically same as Nemotron, but keeping separate tests just in case) auto tst = peg_tester("models/templates/Qwen3.5-4B.jinja", detailed_debug); @@ -2020,6 +2024,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) { }) .run(); + // test code that starts with indent + tst.test( + "\n" + "\n" + "\n" + " print(\"Hello, world!\")\n" + "\n" + "\n" + "") + .enable_thinking(false) + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .tools({ + python_tool + }) + .expect_tool_calls({ + { "python", "{\"code\": \" print(\\\"Hello, world!\\\")\"}", {} }, + }) + .run(); + tst.test( "I need to output the invoice details in JSON\n" "\n\n" @@ -3196,18 +3219,16 @@ static void test_template_output_peg_parsers(bool detailed_debug) { tst.test( "\n" "\n" - "\n" - "foo.cpp\n" + "" + "foo.cpp" "\n" "" "def foo(arg = \"14\"):\n" " return arg + \"bar\"\n" - "\n" "\n" "" "def foo(arg = \"15\"):\n" " pass\n" - "\n" "\n" "\n" "") @@ -4927,6 +4948,20 @@ static void test_template_output_peg_parsers(bool detailed_debug) { auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug); tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).expect_reconstruction().run(); + tst.test( + "```json\n\"42\" \n```") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .json_schema(const_schema) + .expect_content(R"("42")") + .run(); + + tst.test( + "\"42\" \n") + .reasoning_format(COMMON_REASONING_FORMAT_AUTO) + .json_schema(const_schema) + .expect_content(R"("42")") + .run(); + // Continuation tests tst.test("world!\nWhat's up?") .messages({ message_user, message_assist_prefill_content })