From 2b237c7a4100316de7843884d72af9c402e35e53 Mon Sep 17 00:00:00 2001 From: hoobnn <111053672+hoobnn@users.noreply.github.com> Date: Thu, 4 Jun 2026 04:27:45 +0800 Subject: [PATCH] [Bugfix] Honor tool_choice="none" in Chat Completions streaming (#42752) Signed-off-by: hoobnn <111053672+hoobnn@users.noreply.github.com> Signed-off-by: sfeng33 <4florafeng@gmail.com> Co-authored-by: sfeng33 <4florafeng@gmail.com> --- tests/parser/test_streaming.py | 37 ++++++++++++++++++++++++++++++++++ vllm/parser/abstract_parser.py | 3 +++ 2 files changed, 40 insertions(+) diff --git a/tests/parser/test_streaming.py b/tests/parser/test_streaming.py index 2ba2392f8e9..dbc64e75593 100644 --- a/tests/parser/test_streaming.py +++ b/tests/parser/test_streaming.py @@ -36,11 +36,24 @@ def tokenizer(): return get_tokenizer("Qwen/Qwen3-32B") +TOOLS = [ + { + "type": "function", + "function": { + "name": "get_weather", + "parameters": {"type": "object", "properties": {}}, + }, + } +] + + @pytest.fixture def request_obj(): return ChatCompletionRequest( model="test-model", messages=[{"role": "user", "content": "hi"}], + tools=TOOLS, + tool_choice="auto", ) @@ -328,3 +341,27 @@ def test_parse_delta_finished_appends_remaining_args(tokenizer, request_obj): tc.function.arguments for tc in tool_calls if tc.function.arguments ) assert tool_args.endswith(remainder) + + +def test_parse_delta_tool_choice_none(tokenizer, request_obj): + parser = make_parser(tokenizer, reasoning=False, tool=True) + request = request_obj.model_copy(update={"tool_choice": "none"}) + results = stream_text(parser, tokenizer, MODEL_OUTPUT, request, prompt_token_ids=[]) + reasoning, content, tool_calls = collect_fields(results) + + assert reasoning == "" + assert len(tool_calls) == 0 + assert "" in content + assert "get_weather" in content + + +def test_parse_delta_tool_choice_none_with_reasoning(tokenizer, request_obj): + parser = make_parser(tokenizer, reasoning=True, tool=True) + request = request_obj.model_copy(update={"tool_choice": "none"}) + results = stream_text(parser, tokenizer, MODEL_OUTPUT, request, prompt_token_ids=[]) + reasoning, content, tool_calls = collect_fields(results) + + assert "let me think about this" in reasoning + assert len(tool_calls) == 0 + assert "" in content + assert "get_weather" in content diff --git a/vllm/parser/abstract_parser.py b/vllm/parser/abstract_parser.py index 9e4d1830b4d..d5ea574bf76 100644 --- a/vllm/parser/abstract_parser.py +++ b/vllm/parser/abstract_parser.py @@ -706,6 +706,9 @@ class DelegatingParser(Parser): tool_call_id_type: str = "random", function_name_returned: bool = False, ) -> tuple[DeltaMessage | None, bool]: + if request.tool_choice == "none": + return (DeltaMessage(content=delta_text) if delta_text else None), False + assert self._tool_parser is not None supports_required_and_named = self._tool_parser.supports_required_and_named if (