diff --git a/common/chat-auto-parser-generator.cpp b/common/chat-auto-parser-generator.cpp
index db3a6cc6fe..6f825a131b 100644
--- a/common/chat-auto-parser-generator.cpp
+++ b/common/chat-auto-parser-generator.cpp
@@ -134,7 +134,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs, cons
auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
parser = ctx.reasoning_parser + p.space() + p.choice({
p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
- response_format
+ p.space() + response_format + p.space()
}) + p.end();
pure_content = false;
} else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
@@ -393,8 +393,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
(schema_info.resolves_to_string(param_schema) ?
p.tool_arg_string_value(until_suffix) :
p.tool_arg_json_value(p.schema(
- p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) +
- p.space()) +
+ p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false))) +
p.tool_arg_close(p.literal(arguments.value_suffix)));
auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
diff --git a/common/chat-diff-analyzer.cpp b/common/chat-diff-analyzer.cpp
index 0875c5347f..ecd9c807c1 100644
--- a/common/chat-diff-analyzer.cpp
+++ b/common/chat-diff-analyzer.cpp
@@ -1229,8 +1229,8 @@ void analyze_tools::extract_argument_name_markers() {
left_result.tags["pre"] == right_result.tags["pre"] &&
left_result.tags["suffix"] == right_result.tags["suffix"]) {
// Name is inside a structure (e.g., JSON key): prefix is the shared wrapper
- arguments.name_prefix = trim_whitespace(left_result.tags["pre"]);
- arguments.name_suffix = trim_leading_whitespace(left_result.tags["suffix"]);
+ arguments.name_prefix = left_result.tags["pre"];
+ arguments.name_suffix = left_result.tags["suffix"];
} else if (diff.left.substr(0, ARG_FIRST.length()) == ARG_FIRST && diff.right.substr(0, ARG_SECOND.length()) == ARG_SECOND) {
// Name is directly in the diff: prefix comes from last marker in diff.prefix
auto pre_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
@@ -1315,8 +1315,7 @@ void analyze_tools::extract_argument_value_markers() {
value_suffix = value_suffix.substr(0, end_marker_pos);
}
}
- value_suffix = trim_leading_whitespace(value_suffix);
- if (!value_suffix.empty()) {
+ if (!trim_whitespace(value_suffix).empty()) {
arguments.value_suffix = value_suffix;
}
}
diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
index 9bc5ac98be..23b5b38412 100644
--- a/common/chat-peg-parser.cpp
+++ b/common/chat-peg-parser.cpp
@@ -363,7 +363,7 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
}
if ((is_arg_value || is_arg_string_value) && current_tool) {
- std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
+ std::string value_content = std::string(node.text);
std::string value_to_add;
if (value_content.empty() && is_arg_string_value) {
diff --git a/common/peg-parser.cpp b/common/peg-parser.cpp
index e37c1ce80e..310bebf735 100644
--- a/common/peg-parser.cpp
+++ b/common/peg-parser.cpp
@@ -1272,13 +1272,13 @@ common_peg_parser common_peg_parser_builder::string_content(char delimiter) {
common_peg_parser common_peg_parser_builder::double_quoted_string() {
return rule("double-quoted-string", [this]() {
- return sequence({literal("\""), string_content('"'), literal("\""), space()});
+ return sequence({literal("\""), string_content('"'), literal("\"")});
});
}
common_peg_parser common_peg_parser_builder::single_quoted_string() {
return rule("single-quoted-string", [this]() {
- return sequence({literal("'"), string_content('\''), literal("'"), space()});
+ return sequence({literal("'"), string_content('\''), literal("'")});
});
}
@@ -1301,25 +1301,25 @@ common_peg_parser common_peg_parser_builder::json_number() {
// At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed.
// This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming).
auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1));
- return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() });
+ return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation });
});
}
common_peg_parser common_peg_parser_builder::json_string() {
return rule("json-string", [this]() {
- return sequence({literal("\""), string_content('"'), literal("\""), space()});
+ return sequence({literal("\""), string_content('"'), literal("\"")});
});
}
common_peg_parser common_peg_parser_builder::json_bool() {
return rule("json-bool", [this]() {
- return sequence({choice({literal("true"), literal("false")}), space()});
+ return choice({literal("true"), literal("false")});
});
}
common_peg_parser common_peg_parser_builder::json_null() {
return rule("json-null", [this]() {
- return sequence({literal("null"), space()});
+ return literal("null");
});
}
@@ -1334,8 +1334,7 @@ common_peg_parser common_peg_parser_builder::json_object() {
choice({
literal("}"),
sequence({members, ws, literal("}")})
- }),
- ws
+ })
});
});
}
@@ -1350,8 +1349,7 @@ common_peg_parser common_peg_parser_builder::json_array() {
choice({
literal("]"),
sequence({elements, ws, literal("]")})
- }),
- ws
+ })
});
});
}
@@ -1381,16 +1379,13 @@ common_peg_parser common_peg_parser_builder::python_number() {
common_peg_parser common_peg_parser_builder::python_bool() {
return rule("python-bool", [this]() {
- return sequence({
- choice({literal("True"), literal("False")}),
- space()
- });
+ return choice({literal("True"), literal("False")});
});
}
common_peg_parser common_peg_parser_builder::python_null() {
return rule("python-none", [this]() {
- return sequence({literal("None"), space()});
+ return literal("None");
});
}
diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp
index 6f8e957489..5cc1057532 100644
--- a/tests/test-chat-auto-parser.cpp
+++ b/tests/test-chat-auto-parser.cpp
@@ -1369,7 +1369,7 @@ static void test_nemotron_tool_format(testing & t) {
// Check argument markers (note: markers retain trailing newlines for proper parsing)
t.assert_equal("arg_name_prefix should be '\\n'", ">\n", analysis.tools.arguments.name_suffix);
- t.assert_equal("arg_value_suffix should be '\\n'", "\n", analysis.tools.arguments.value_suffix);
+ t.assert_equal("arg_value_suffix should be '\\n\\n'", "\n\n", analysis.tools.arguments.value_suffix);
// Check format classification
t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED);
@@ -2030,12 +2030,11 @@ static void test_tagged_args_with_embedded_quotes(testing & t) {
return p.content(p.until("")) + p.optional(tool_section) + p.end();
});
- // The exact input from the failing test
std::string input =
"\n"
"\n"
- "\n"
- "foo.cpp\n"
+ ""
+ "foo.cpp"
"\n"
""
"def foo(arg = \"14\"):\n"
diff --git a/tests/test-chat.cpp b/tests/test-chat.cpp
index 29fd3db823..e65af46c63 100644
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -1935,6 +1935,10 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
}
})";
+ const char * const_schema = R"({
+ "const": "42"
+ })";
+
{
// Qwen3.5 (basically same as Nemotron, but keeping separate tests just in case)
auto tst = peg_tester("models/templates/Qwen3.5-4B.jinja", detailed_debug);
@@ -2020,6 +2024,25 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
})
.run();
+ // test code that starts with indent
+ tst.test(
+ "\n"
+ "\n"
+ "\n"
+ " print(\"Hello, world!\")\n"
+ "\n"
+ "\n"
+ "")
+ .enable_thinking(false)
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .tools({
+ python_tool
+ })
+ .expect_tool_calls({
+ { "python", "{\"code\": \" print(\\\"Hello, world!\\\")\"}", {} },
+ })
+ .run();
+
tst.test(
"I need to output the invoice details in JSON\n"
"\n\n"
@@ -3196,18 +3219,16 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
tst.test(
"\n"
"\n"
- "\n"
- "foo.cpp\n"
+ ""
+ "foo.cpp"
"\n"
""
"def foo(arg = \"14\"):\n"
" return arg + \"bar\"\n"
- "\n"
"\n"
""
"def foo(arg = \"15\"):\n"
" pass\n"
- "\n"
"\n"
"\n"
"")
@@ -4927,6 +4948,20 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).expect_reconstruction().run();
+ tst.test(
+ "```json\n\"42\" \n```")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .json_schema(const_schema)
+ .expect_content(R"("42")")
+ .run();
+
+ tst.test(
+ "\"42\" \n")
+ .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
+ .json_schema(const_schema)
+ .expect_content(R"("42")")
+ .run();
+
// Continuation tests
tst.test("world!\nWhat's up?")
.messages({ message_user, message_assist_prefill_content })