[Bugfix] Handle real-world gpt-oss tool call output in Harmony parsing (#42454)

Signed-off-by: Ben Browning <bbrownin@redhat.com>
2026-06-06 00:16:14 +00:00 · 2026-05-13 13:54:46 -04:00
parent b3c69595a6
commit 0f69128a37
12 changed files with 801 additions and 50 deletions
@@ -199,12 +199,107 @@ class TestExtractHarmonyStreamingDelta:
        assert delta_message.content == delta_text
        assert tools_streamed is False

+    @pytest.mark.parametrize("channel", ["commentary", "analysis"])
+    @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
+    def test_new_tool_call_without_functions_prefix(
+        self, mock_make_tool_call_id, channel
+    ):
+        mock_make_tool_call_id.return_value = "call_bare123"
+        parser = MockStreamableParser()
+
+        token_states = [TokenState(channel=channel, recipient="get_weather", text="")]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient=None,
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert len(delta_message.tool_calls) == 1
+        tool_call = delta_message.tool_calls[0]
+        assert tool_call.id == "call_bare123"
+        assert tool_call.type == "function"
+        assert tool_call.function.name == "get_weather"
+        assert tool_call.function.arguments == ""
+        assert tool_call.index == 0
+        assert tools_streamed is True
+
+    @pytest.mark.parametrize("channel", ["commentary", "analysis"])
+    def test_tool_call_argument_streaming_without_functions_prefix(self, channel):
+        parser = MockStreamableParser()
+        args_text = '{"location": "Paris"}'
+
+        token_states = [
+            TokenState(channel=channel, recipient="get_weather", text=args_text)
+        ]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient="get_weather",
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        tool_call = delta_message.tool_calls[0]
+        assert tool_call.id is None
+        assert tool_call.function.arguments == args_text
+        assert tool_call.index == 0
+        assert tools_streamed is True
+
+    def test_tool_call_index_from_previous_messages_without_functions_prefix(self):
+        messages = [
+            MockMessage(channel="commentary", recipient="tool1"),
+        ]
+        parser = MockStreamableParser(messages=messages)
+
+        token_states = [
+            TokenState(channel="commentary", recipient="tool2", text="args")
+        ]
+
+        delta_message, _ = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient="tool2",
+            include_reasoning=False,
+        )
+
+        assert delta_message.tool_calls[0].index == 1
+
+    @pytest.mark.parametrize("channel", ["commentary", "analysis"])
+    @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
+    def test_new_tool_call_dotted_function_name(self, mock_make_tool_call_id, channel):
+        mock_make_tool_call_id.return_value = "call_dotted123"
+        parser = MockStreamableParser()
+
+        token_states = [TokenState(channel=channel, recipient="math.sum", text="")]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient=None,
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert len(delta_message.tool_calls) == 1
+        tool_call = delta_message.tool_calls[0]
+        assert tool_call.id == "call_dotted123"
+        assert tool_call.type == "function"
+        assert tool_call.function.name == "math.sum"
+        assert tool_call.function.arguments == ""
+        assert tool_call.index == 0
+        assert tools_streamed is True
+
    @pytest.mark.parametrize(
        "channel,recipient",
        [
            (None, None),
            ("unknown_channel", None),
            ("commentary", "browser.search"),
+            ("commentary", "assistant"),
        ],
    )
    def test_returns_none_for_invalid_inputs(self, channel, recipient):
@@ -348,3 +443,92 @@ class TestExtractHarmonyStreamingDelta:
        assert tool_c_args.function.arguments == '{"key_c": "val_c"}'

        assert delta_message.content == "Thinking... Thinking again..."
+
+
+class TestToolCallsOnNonStandardChannels:
+    """Tool calls are detected by recipient, not channel.
+
+    Models sometimes emit tool calls on unexpected channels (e.g. ``comment``
+    instead of ``commentary``).  These tests verify that the streaming delta
+    extraction is channel-agnostic for tool call detection.
+    """
+
+    @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
+    def test_prefixed_tool_call_on_comment_channel(self, mock_make_tool_call_id):
+        mock_make_tool_call_id.return_value = "call_comment_chan"
+        parser = MockStreamableParser()
+
+        token_states = [
+            TokenState(channel="comment", recipient="functions.get_weather", text="")
+        ]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient=None,
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert len(delta_message.tool_calls) == 1
+        assert delta_message.tool_calls[0].function.name == "get_weather"
+        assert tools_streamed is True
+
+    @patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
+    def test_bare_tool_call_on_comment_channel(self, mock_make_tool_call_id):
+        mock_make_tool_call_id.return_value = "call_bare_comment"
+        parser = MockStreamableParser()
+
+        token_states = [TokenState(channel="comment", recipient="get_weather", text="")]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient=None,
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert len(delta_message.tool_calls) == 1
+        assert delta_message.tool_calls[0].function.name == "get_weather"
+        assert tools_streamed is True
+
+    def test_tool_call_arguments_on_comment_channel(self):
+        parser = MockStreamableParser()
+        args_text = '{"location": "Paris"}'
+
+        token_states = [
+            TokenState(
+                channel="comment", recipient="functions.get_weather", text=args_text
+            )
+        ]
+
+        delta_message, tools_streamed = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient="functions.get_weather",
+            include_reasoning=False,
+        )
+
+        assert delta_message is not None
+        assert delta_message.tool_calls[0].function.arguments == args_text
+        assert tools_streamed is True
+
+    def test_base_index_counts_tool_calls_on_comment_channel(self):
+        messages = [
+            MockMessage(channel="comment", recipient="functions.tool1"),
+        ]
+        parser = MockStreamableParser(messages=messages)
+
+        token_states = [
+            TokenState(channel="commentary", recipient="functions.tool2", text="args")
+        ]
+
+        delta_message, _ = extract_harmony_streaming_delta(
+            harmony_parser=parser,
+            token_states=token_states,
+            prev_recipient="functions.tool2",
+            include_reasoning=False,
+        )
+
+        assert delta_message.tool_calls[0].index == 1
@@ -7,9 +7,11 @@ from openai_harmony import Message, Role
 from tests.entrypoints.openai.utils import verify_harmony_messages
 from vllm.entrypoints.openai.parser.harmony_utils import (
    auto_drop_analysis_messages,
+    extract_function_from_recipient,
    get_encoding,
    get_system_message,
    has_custom_tools,
+    is_function_recipient,
    parse_chat_input_to_harmony_message,
    parse_chat_output,
 )
@@ -19,6 +21,182 @@ from vllm.entrypoints.openai.responses.harmony import (
 )


+class TestIsFunctionRecipient:
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "functions.get_weather",
+            "functions.search_web",
+            "functions.math.sum",
+        ],
+    )
+    def test_functions_prefix_accepted(self, recipient):
+        assert is_function_recipient(recipient) is True
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "get_weather",
+            "search_web",
+            "calculator",
+            "my-tool",
+        ],
+    )
+    def test_bare_function_name_accepted(self, recipient):
+        assert is_function_recipient(recipient) is True
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "assistant",
+        ],
+    )
+    def test_assistant_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "math.sum",
+            "code.run",
+            "namespace.tool_name",
+            "my.deeply.nested.tool",
+        ],
+    )
+    def test_dotted_function_names_accepted(self, recipient):
+        assert is_function_recipient(recipient) is True
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "python",
+            "browser",
+            "container",
+        ],
+    )
+    def test_builtin_tool_names_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "python.run",
+            "python.execute",
+            "browser.search",
+            "browser.open",
+            "container.exec",
+        ],
+    )
+    def test_builtin_dotted_variants_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "",
+            "functions.",
+        ],
+    )
+    def test_empty_recipients_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "<|start|>",
+            "<|end|>",
+            "<|channel|>",
+        ],
+    )
+    def test_harmony_tokens_rejected(self, recipient):
+        assert is_function_recipient(recipient) is False
+
+
+class TestIsFunctionRecipientWithAllowedNames:
+    """Tests for is_function_recipient with allowed_function_tool_names."""
+
+    def test_prefixed_always_accepted(self):
+        """functions. prefix is always accepted regardless of allowed names."""
+        fn_names = frozenset({"other_tool"})
+        assert is_function_recipient("functions.get_weather", fn_names) is True
+
+    def test_bare_name_accepted_when_in_allowed_names(self):
+        fn_names = frozenset({"get_weather", "search_web"})
+        assert is_function_recipient("get_weather", fn_names) is True
+        assert is_function_recipient("search_web", fn_names) is True
+
+    def test_bare_name_rejected_when_not_in_allowed_names(self):
+        fn_names = frozenset({"get_weather"})
+        assert is_function_recipient("unknown_tool", fn_names) is False
+
+    def test_dotted_name_accepted_when_in_allowed_names(self):
+        fn_names = frozenset({"math.sum", "namespace.tool_name"})
+        assert is_function_recipient("math.sum", fn_names) is True
+        assert is_function_recipient("namespace.tool_name", fn_names) is True
+
+    def test_dotted_name_rejected_when_not_in_allowed_names(self):
+        fn_names = frozenset({"get_weather"})
+        assert is_function_recipient("custom_server.search", fn_names) is False
+
+    def test_empty_allowed_names_rejects_bare_names(self):
+        """Empty frozenset means no function tools — bare names are not functions."""
+        fn_names: frozenset[str] = frozenset()
+        assert is_function_recipient("get_weather", fn_names) is False
+        assert is_function_recipient("math.sum", fn_names) is False
+
+    def test_builtin_tools_always_rejected(self):
+        fn_names = frozenset({"python", "browser", "container"})
+        assert is_function_recipient("python", fn_names) is False
+        assert is_function_recipient("browser", fn_names) is False
+        assert is_function_recipient("container", fn_names) is False
+
+    def test_builtin_dotted_always_rejected(self):
+        fn_names = frozenset({"python.run", "browser.search"})
+        assert is_function_recipient("python.run", fn_names) is False
+        assert is_function_recipient("browser.search", fn_names) is False
+
+    def test_none_allowed_names_uses_heuristic(self):
+        """When allowed names is None (Chat Completions), use heuristic."""
+        assert is_function_recipient("get_weather", None) is True
+        assert is_function_recipient("math.sum", None) is True
+        assert is_function_recipient("python", None) is False
+
+
+class TestExtractFunctionFromRecipient:
+    @pytest.mark.parametrize(
+        "recipient,expected",
+        [
+            ("functions.get_weather", "get_weather"),
+            ("functions.search_web", "search_web"),
+            ("functions.", ""),
+        ],
+    )
+    def test_strips_functions_prefix(self, recipient, expected):
+        assert extract_function_from_recipient(recipient) == expected
+
+    @pytest.mark.parametrize(
+        "recipient",
+        [
+            "get_weather",
+            "calculator",
+            "my-tool",
+        ],
+    )
+    def test_bare_name_returned_as_is(self, recipient):
+        assert extract_function_from_recipient(recipient) == recipient
+
+    @pytest.mark.parametrize(
+        "recipient,expected",
+        [
+            ("functions.math.sum", "math.sum"),
+            ("math.sum", "math.sum"),
+            ("namespace.tool_name", "namespace.tool_name"),
+        ],
+    )
+    def test_dotted_function_name_extraction(self, recipient, expected):
+        assert extract_function_from_recipient(recipient) == expected
+
+
 class TestCommonParseInputToHarmonyMessage:
    """
    Tests for scenarios that are common to both Chat Completion
@@ -246,7 +246,8 @@ class TestHarmonyToResponseOutput:
        message = message.with_channel("commentary")
        message = message.with_recipient("custom_tool")

-        output_items = harmony_to_response_output(message)
+        fn_names = frozenset({"other_tool"})
+        output_items = harmony_to_response_output(message, fn_names)

        assert len(output_items) == 1
        assert isinstance(output_items[0], McpCall)
@@ -286,13 +287,179 @@ class TestHarmonyToResponseOutput:
        assert len(output_items) == 0


+class TestHarmonyToResponseOutputWithFunctionToolNames:
+    """Tests for bare function name handling with function_tool_names."""
+
+    def test_bare_name_creates_function_call_when_in_tool_names(self):
+        """Bare function name matching a known tool creates function call."""
+        message = Message.from_role_and_content(
+            Role.ASSISTANT, '{"location": "San Francisco"}'
+        )
+        message = message.with_channel("commentary")
+        message = message.with_recipient("get_weather")
+
+        fn_names = frozenset({"get_weather"})
+        output_items = harmony_to_response_output(message, fn_names)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].type == "function_call"
+        assert output_items[0].name == "get_weather"
+        assert output_items[0].arguments == '{"location": "San Francisco"}'
+
+    def test_bare_name_creates_mcp_call_when_not_in_tool_names(self):
+        """Bare name not matching any known tool creates MCP call."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("custom_tool")
+
+        fn_names = frozenset({"get_weather"})
+        output_items = harmony_to_response_output(message, fn_names)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], McpCall)
+        assert output_items[0].type == "mcp_call"
+
+    def test_dotted_function_name_creates_function_call(self):
+        """Dotted function name in tool names creates function call."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"a": 1, "b": 2}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("math.sum")
+
+        fn_names = frozenset({"math.sum"})
+        output_items = harmony_to_response_output(message, fn_names)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].name == "math.sum"
+
+    def test_empty_tool_names_defaults_to_mcp(self):
+        """With empty function_tool_names, bare names become MCP calls."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("get_weather")
+
+        output_items = harmony_to_response_output(message, frozenset())
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], McpCall)
+
+    def test_prefixed_name_always_function_call(self):
+        """functions. prefix always creates function call even with empty tool names."""
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
+        message = message.with_channel("commentary")
+        message = message.with_recipient("functions.get_weather")
+
+        output_items = harmony_to_response_output(message, frozenset())
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].name == "get_weather"
+
+
+class TestParserStateWithFunctionToolNames:
+    """Tests for parser_state_to_response_output with function_tool_names."""
+
+    def test_bare_name_creates_function_call(self):
+        from unittest.mock import Mock
+
+        parser = Mock()
+        parser.current_content = '{"arg": "value"}'
+        parser.current_role = Role.ASSISTANT
+        parser.current_channel = "commentary"
+        parser.current_recipient = "get_weather"
+
+        fn_names = frozenset({"get_weather"})
+        items = parser_state_to_response_output(parser, fn_names)
+
+        assert len(items) == 1
+        assert isinstance(items[0], ResponseFunctionToolCall)
+        assert items[0].name == "get_weather"
+        assert items[0].status == "in_progress"
+
+    def test_bare_name_creates_mcp_when_not_in_tool_names(self):
+        from unittest.mock import Mock
+
+        parser = Mock()
+        parser.current_content = '{"arg": "value"}'
+        parser.current_role = Role.ASSISTANT
+        parser.current_channel = "commentary"
+        parser.current_recipient = "unknown_tool"
+
+        fn_names = frozenset({"get_weather"})
+        items = parser_state_to_response_output(parser, fn_names)
+
+        assert len(items) == 1
+        assert isinstance(items[0], McpCall)
+        assert items[0].name == "unknown_tool"
+
+
+class TestToolCallsOnNonStandardChannels:
+    """Tests verifying tool calls are detected regardless of channel."""
+
+    def test_function_call_on_comment_channel(self):
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"query": "weather"}')
+        message = message.with_channel("comment")
+        message = message.with_recipient("functions.get_weather")
+
+        output_items = harmony_to_response_output(message)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].type == "function_call"
+        assert output_items[0].name == "get_weather"
+
+    def test_bare_function_on_comment_channel(self):
+        message = Message.from_role_and_content(Role.ASSISTANT, '{"query": "weather"}')
+        message = message.with_channel("comment")
+        message = message.with_recipient("get_weather")
+
+        fn_names = frozenset({"get_weather"})
+        output_items = harmony_to_response_output(message, fn_names)
+
+        assert len(output_items) == 1
+        assert isinstance(output_items[0], ResponseFunctionToolCall)
+        assert output_items[0].name == "get_weather"
+
+    def test_parser_state_comment_channel_function(self):
+        from unittest.mock import Mock
+
+        parser = Mock()
+        parser.current_content = '{"arg": "value"}'
+        parser.current_role = Role.ASSISTANT
+        parser.current_channel = "comment"
+        parser.current_recipient = "functions.get_weather"
+
+        items = parser_state_to_response_output(parser)
+
+        assert len(items) == 1
+        assert isinstance(items[0], ResponseFunctionToolCall)
+        assert items[0].name == "get_weather"
+
+    def test_parser_state_comment_channel_mcp(self):
+        from unittest.mock import Mock
+
+        parser = Mock()
+        parser.current_content = '{"arg": "value"}'
+        parser.current_role = Role.ASSISTANT
+        parser.current_channel = "comment"
+        parser.current_recipient = "mcp.server.tool"
+
+        fn_names: frozenset[str] = frozenset()
+        items = parser_state_to_response_output(parser, fn_names)
+
+        assert len(items) == 1
+        assert isinstance(items[0], McpCall)
+
+
 def test_parse_mcp_call_basic() -> None:
    """Test that MCP calls are parsed with correct type and server_label."""
    message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
    message = message.with_recipient("filesystem")
    message = message.with_channel("commentary")

-    output_items = harmony_to_response_output(message)
+    fn_names: frozenset[str] = frozenset()
+    output_items = harmony_to_response_output(message, fn_names)

    assert len(output_items) == 1
    assert isinstance(output_items[0], McpCall)
@@ -309,7 +476,8 @@ def test_parse_mcp_call_dotted_recipient() -> None:
    message = message.with_recipient("repo_browser.list")
    message = message.with_channel("commentary")

-    output_items = harmony_to_response_output(message)
+    fn_names: frozenset[str] = frozenset()
+    output_items = harmony_to_response_output(message, fn_names)

    assert len(output_items) == 1
    assert isinstance(output_items[0], McpCall)
@@ -371,7 +539,8 @@ def test_parser_state_to_response_output_commentary_channel() -> None:
    parser_mcp.current_channel = "commentary"
    parser_mcp.current_recipient = "filesystem"

-    mcp_items = parser_state_to_response_output(parser_mcp)
+    fn_names: frozenset[str] = frozenset()
+    mcp_items = parser_state_to_response_output(parser_mcp, fn_names)

    assert len(mcp_items) == 1
    assert isinstance(mcp_items[0], McpCall)
@@ -438,7 +607,8 @@ def test_parser_state_to_response_output_analysis_channel() -> None:
    parser_mcp.current_channel = "analysis"
    parser_mcp.current_recipient = "database"

-    mcp_items = parser_state_to_response_output(parser_mcp)
+    fn_names: frozenset[str] = frozenset()
+    mcp_items = parser_state_to_response_output(parser_mcp, fn_names)

    assert len(mcp_items) == 1
    assert isinstance(mcp_items[0], McpCall)
@@ -217,6 +217,158 @@ def test_extract_tool_calls_multiple_tools(
    assert extracted_info.content is None


+def test_extract_tool_calls_bare_function_name(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(Role.USER, "What is the weather in Tokyo?"),
+            Message.from_role_and_content(
+                Role.ASSISTANT,
+                "We need to use get_current_weather tool.",
+            ).with_channel("analysis"),
+            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
+            .with_channel("commentary")
+            .with_recipient("get_current_weather")
+            .with_content_type("json"),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT
+    )
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(
+                name="get_current_weather",
+                arguments=json.dumps({"location": "Tokyo"}),
+            )
+        )
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+    assert extracted_info.content is None
+
+
+def test_extract_tool_calls_bare_function_name_multiple(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(
+                Role.USER, "What is the weather in Tokyo based on where I'm at?"
+            ),
+            Message.from_role_and_content(
+                Role.ASSISTANT,
+                "We need to use both tools.",
+            ).with_channel("analysis"),
+            Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
+            .with_channel("commentary")
+            .with_recipient("get_current_weather")
+            .with_content_type("json"),
+            Message.from_role_and_content(Role.ASSISTANT, "{}")
+            .with_channel("commentary")
+            .with_recipient("get_user_location")
+            .with_content_type("json"),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT
+    )
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(
+                name="get_current_weather",
+                arguments=json.dumps({"location": "Tokyo"}),
+            )
+        ),
+        ToolCall(
+            function=FunctionCall(
+                name="get_user_location",
+                arguments=json.dumps({}),
+            )
+        ),
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+
+
+def test_extract_tool_calls_assistant_recipient_ignored(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(Role.USER, "Hello"),
+            Message.from_role_and_content(Role.ASSISTANT, "Some tool response")
+            .with_channel("commentary")
+            .with_recipient("assistant"),
+            Message.from_role_and_content(
+                Role.ASSISTANT, "Here is the answer"
+            ).with_channel("final"),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT
+    )
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert not extracted_info.tools_called
+    assert extracted_info.tool_calls == []
+    assert extracted_info.content == "Here is the answer"
+
+
+def test_extract_tool_calls_dotted_function_name(
+    openai_tool_parser,
+    harmony_encoding,
+):
+    convo = Conversation.from_messages(
+        [
+            Message.from_role_and_content(Role.USER, "Compute 2+3"),
+            Message.from_role_and_content(Role.ASSISTANT, '{"a": 2, "b": 3}')
+            .with_channel("commentary")
+            .with_recipient("math.sum")
+            .with_content_type("json"),
+        ]
+    )
+    token_ids = harmony_encoding.render_conversation_for_completion(
+        convo, Role.ASSISTANT
+    )
+
+    extracted_info = openai_tool_parser.extract_tool_calls(
+        "",
+        request=None,
+        token_ids=token_ids,
+    )
+    assert extracted_info.tools_called
+    expected_tool_calls = [
+        ToolCall(
+            function=FunctionCall(
+                name="math.sum",
+                arguments=json.dumps({"a": 2, "b": 3}),
+            )
+        )
+    ]
+    assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
+
+
 def test_extract_tool_calls_with_content(
    openai_tool_parser,
    harmony_encoding,
@@ -17,6 +17,10 @@ from vllm.entrypoints.openai.engine.protocol import (
    DeltaMessage,
    DeltaToolCall,
 )
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    extract_function_from_recipient,
+    is_function_recipient,
+)


 class TokenState(NamedTuple):
@@ -79,16 +83,12 @@ def extract_harmony_streaming_delta(
    # This counts completed tool calls in messages
    base_index = 0
    for msg in harmony_parser.messages:
-        if (
-            (msg.channel == "commentary" or msg.channel == "analysis")
-            and msg.recipient
-            and msg.recipient.startswith("functions.")
-        ):
+        if msg.recipient and is_function_recipient(msg.recipient):
            base_index += 1

    # If there's an ongoing tool call from previous chunk,
    # the next new tool call starts at base_index + 1
-    if prev_recipient and prev_recipient.startswith("functions."):
+    if prev_recipient and is_function_recipient(prev_recipient):
        next_tool_index = base_index + 1
        # Ongoing call is at base_index
        ongoing_tool_index = base_index
@@ -101,15 +101,11 @@ def extract_harmony_streaming_delta(
        if group.channel == "final":
            combined_content += group.text
            content_encountered = True
-        elif (
-            (group.channel == "commentary" or group.channel == "analysis")
-            and group.recipient
-            and group.recipient.startswith("functions.")
-        ):
+        elif group.recipient and is_function_recipient(group.recipient):
            opened_new_call = False
            if prev_recipient != group.recipient:
                # New tool call - emit the opening message
-                tool_name = group.recipient.split("functions.", 1)[1]
+                tool_name = extract_function_from_recipient(group.recipient)
                tool_messages.append(
                    DeltaToolCall(
                        id=make_tool_call_id(),
@@ -26,6 +26,42 @@ from vllm.logger import init_logger

 logger = init_logger(__name__)

+
+def is_function_recipient(
+    recipient: str,
+    allowed_function_tool_names: frozenset[str] | None = None,
+) -> bool:
+    """Check whether *recipient* refers to a function tool call.
+
+    The optional *allowed_function_tool_names* parameter is used by the
+    Responses API to distinguish bare function-call recipients (missing the
+    ``functions.`` prefix) from MCP tool calls.  When provided, a bare
+    recipient is only treated as a function call if it appears in the set.
+    The Chat Completions path omits this parameter so that all bare
+    recipients are accepted as function calls (the heuristic fallback).
+    """
+    if not recipient:
+        return False
+    if recipient.startswith("<|"):
+        return False
+    if recipient.startswith("functions."):
+        return len(recipient) > len("functions.")
+    if recipient == "assistant":
+        return False
+    if recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
+        return False
+    first_segment = recipient.split(".", 1)[0]
+    if first_segment in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
+        return False
+    if allowed_function_tool_names is not None:
+        return recipient in allowed_function_tool_names
+    return True
+
+
+def extract_function_from_recipient(recipient: str) -> str:
+    return recipient.removeprefix("functions.")
+
+
 REASONING_EFFORT = {
    "high": ReasoningEffort.HIGH,
    "medium": ReasoningEffort.MEDIUM,
@@ -524,10 +524,12 @@ class HarmonyContext(ConversationContext):
        self,
        messages: list,
        available_tools: list[str],
+        function_tool_names: frozenset[str] | None = None,
    ):
        self._messages = messages
        self.finish_reason: str | None = None
        self.available_tools = available_tools
+        self.function_tool_names = function_tool_names
        self._tool_sessions: dict[str, ClientSession | Tool] = {}
        self.called_tools: set[str] = set()

@@ -31,7 +31,9 @@ from openai_harmony import Author, Message, Role, StreamableParser, TextContent

 from vllm.entrypoints.openai.parser.harmony_utils import (
    BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
+    extract_function_from_recipient,
    flatten_chat_text_content,
+    is_function_recipient,
 )
 from vllm.entrypoints.openai.responses.protocol import (
    ResponseInputOutputItem,
@@ -294,7 +296,7 @@ def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutput

 def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
    """Parse function calls into function tool call items."""
-    function_name = recipient.split(".")[-1]
+    function_name = extract_function_from_recipient(recipient)
    output_items = []
    for content in message.content:
        random_id = random_uuid()
@@ -410,7 +412,10 @@ def _parse_message_no_recipient(
 # ---------------------------------------------------------------------------


-def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
+def harmony_to_response_output(
+    message: Message,
+    function_tool_names: frozenset[str] | None = None,
+) -> list[ResponseOutputItem]:
    """Parse a Harmony message into a list of output response items.

    This is the main dispatcher that routes based on channel and recipient.
@@ -429,8 +434,8 @@ def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
        if recipient.startswith("browser."):
            output_items.append(_parse_browser_tool_call(message, recipient))

-        # Function calls (should only happen on commentary channel)
-        elif message.channel == "commentary" and recipient.startswith("functions."):
+        # Function calls (with or without "functions." prefix)
+        elif is_function_recipient(recipient, function_tool_names):
            output_items.extend(_parse_function_call(message, recipient))

        # Built-in MCP tools (python, browser, container)
@@ -450,6 +455,7 @@ def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:

 def parser_state_to_response_output(
    parser: StreamableParser,
+    function_tool_names: frozenset[str] | None = None,
 ) -> list[ResponseOutputItem]:
    """Extract in-progress response items from incomplete parser state.

@@ -464,15 +470,15 @@ def parser_state_to_response_output(
    if current_recipient is not None and current_recipient.startswith("browser."):
        return []

-    if current_recipient and parser.current_channel in ("commentary", "analysis"):
-        if current_recipient.startswith("functions."):
+    if current_recipient:
+        if is_function_recipient(current_recipient, function_tool_names):
            rid = random_uuid()
            return [
                ResponseFunctionToolCall(
                    arguments=parser.current_content,
                    call_id=f"call_{rid}",
                    type="function_call",
-                    name=current_recipient.split(".")[-1],
+                    name=extract_function_from_recipient(current_recipient),
                    id=f"fc_{rid}",
                    status="in_progress",
                )
@@ -89,6 +89,7 @@ from vllm.entrypoints.openai.responses.streaming_events import (
 from vllm.entrypoints.openai.responses.utils import (
    construct_input_messages,
    construct_tool_dicts,
+    extract_function_tool_names,
    extract_tool_types,
 )
 from vllm.entrypoints.serve.render.serving import OpenAIServingRender
@@ -449,11 +450,16 @@ class OpenAIServingResponses(OpenAIServing):
            )

            context: ConversationContext
+            function_tool_names = extract_function_tool_names(request.tools)
            if self.use_harmony:
                if request.stream:
-                    context = StreamingHarmonyContext(messages, available_tools)
+                    context = StreamingHarmonyContext(
+                        messages, available_tools, function_tool_names
+                    )
                else:
-                    context = HarmonyContext(messages, available_tools)
+                    context = HarmonyContext(
+                        messages, available_tools, function_tool_names
+                    )
            else:
                if envs.VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT:
                    # This is a feature in development for parsing
@@ -1070,10 +1076,11 @@ class OpenAIServingResponses(OpenAIServing):
    ) -> list[ResponseOutputItem]:
        output_items: list[ResponseOutputItem] = []
        num_init_messages = context.num_init_messages
+        fn_names = context.function_tool_names
        for msg in context.messages[num_init_messages:]:
-            output_items.extend(harmony_to_response_output(msg))
+            output_items.extend(harmony_to_response_output(msg, fn_names))
        # Handle the generation stopped in the middle (if any).
-        last_items = parser_state_to_response_output(context.parser)
+        last_items = parser_state_to_response_output(context.parser, fn_names)
        if last_items:
            output_items.extend(last_items)
        return output_items
@@ -1448,7 +1455,9 @@ class OpenAIServingResponses(OpenAIServing):
            if ctx.is_expecting_start():
                if len(ctx.parser.messages) > 0:
                    previous_item = ctx.parser.messages[-1]
-                    for event in emit_previous_item_done_events(previous_item, state):
+                    for event in emit_previous_item_done_events(
+                        previous_item, state, ctx.function_tool_names
+                    ):
                        yield _increment_sequence_number_and_return(event)
                state.reset_for_new_item()

@@ -62,6 +62,10 @@ from openai_harmony import Message as HarmonyMessage

 from vllm.entrypoints.mcp.tool_server import ToolServer
 from vllm.entrypoints.openai.engine.protocol import DeltaMessage
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    extract_function_from_recipient,
+    is_function_recipient,
+)
 from vllm.entrypoints.openai.responses.context import StreamingHarmonyContext
 from vllm.entrypoints.openai.responses.protocol import (
    ResponseReasoningPartAddedEvent,
@@ -111,19 +115,19 @@ class StreamingState:
        self.current_call_id = ""


-def is_mcp_tool_by_namespace(recipient: str | None) -> bool:
+def is_mcp_tool_by_namespace(
+    recipient: str | None,
+    allowed_function_tool_names: frozenset[str] | None = None,
+) -> bool:
    """
    Determine if a tool call is an MCP tool based on recipient prefix.

-    - Tools starting with "functions." are function calls
-    - Everything else is an MCP tool
+    Inverse of :func:`is_function_recipient` — everything that is not
+    a function call is an MCP tool.
    """
    if recipient is None:
        return False
-
-    # Function calls have "functions." prefix
-    # Everything else is an MCP tool
-    return not recipient.startswith("functions.")
+    return not is_function_recipient(recipient, allowed_function_tool_names)


 # =====================================================================
@@ -575,16 +579,16 @@ def emit_content_delta_events(
        return emit_text_delta_events(delta, state)
    elif channel == "analysis" and recipient is None:
        return emit_reasoning_delta_events(delta, state)
-    # built-in tools will be triggered on the analysis channel
-    # However, occasionally built-in tools will
-    # still be output to commentary.
-    elif channel in ("commentary", "analysis") and recipient is not None:
-        if recipient.startswith("functions."):
-            function_name = recipient[len("functions.") :]
+    elif recipient is not None:
+        fn_names = ctx.function_tool_names
+        if is_function_recipient(recipient, fn_names):
+            function_name = extract_function_from_recipient(recipient)
            return emit_function_call_delta_events(delta, function_name, state)
        elif recipient == "python":
            return emit_code_interpreter_delta_events(delta, state)
-        elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(recipient):
+        elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(
+            recipient, fn_names
+        ):
            return emit_mcp_delta_events(delta, state, recipient)

    return []
@@ -593,6 +597,7 @@ def emit_content_delta_events(
 def emit_previous_item_done_events(
    previous_item: HarmonyMessage,
    state: StreamingState,
+    function_tool_names: frozenset[str] | None = None,
 ) -> list[StreamingResponsesResponse]:
    """Emit done events for the previous item when expecting a new start.

@@ -602,13 +607,13 @@ def emit_previous_item_done_events(
    text = previous_item.content[0].text
    if previous_item.recipient is not None:
        # Deal with tool call
-        if previous_item.recipient.startswith("functions."):
-            function_name = previous_item.recipient[len("functions.") :]
+        if is_function_recipient(previous_item.recipient, function_tool_names):
+            function_name = extract_function_from_recipient(previous_item.recipient)
            return emit_function_call_done_events(function_name, text, state)
        elif previous_item.recipient == "python":
            return emit_code_interpreter_completion_events(previous_item, state)
        elif (
-            is_mcp_tool_by_namespace(previous_item.recipient)
+            is_mcp_tool_by_namespace(previous_item.recipient, function_tool_names)
            and state.current_item_id is not None
            and state.current_item_id.startswith("mcp_")
        ):
@@ -792,9 +797,12 @@ def emit_tool_action_events(
        and state.sent_output_item_added
    ):
        recipient = previous_item.recipient
+        fn_names = ctx.function_tool_names
        if recipient == "python":
            events.extend(emit_code_interpreter_completion_events(previous_item, state))
-        elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(recipient):
+        elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(
+            recipient, fn_names
+        ):
            events.extend(
                emit_mcp_completion_events(
                    recipient, previous_item.content[0].text, state
@@ -236,6 +236,10 @@ def _construct_message_from_response_item(
    return item  # type: ignore[arg-type]


+def extract_function_tool_names(tools: list[Tool]) -> frozenset[str]:
+    return frozenset(tool.name for tool in tools if tool.type == "function")
+
+
 def extract_tool_types(tools: list[Tool]) -> set[str]:
    """
    Extracts the tool types from the given tools.
@@ -13,7 +13,11 @@ from vllm.entrypoints.openai.engine.protocol import (
    FunctionCall,
    ToolCall,
 )
-from vllm.entrypoints.openai.parser.harmony_utils import parse_output_into_messages
+from vllm.entrypoints.openai.parser.harmony_utils import (
+    extract_function_from_recipient,
+    is_function_recipient,
+    parse_output_into_messages,
+)
 from vllm.logger import init_logger
 from vllm.tool_parsers.abstract_tool_parser import (
    Tool,
@@ -50,10 +54,12 @@ class OpenAIToolParser(ToolParser):

        if len(parser.messages) > 0:
            for msg in parser.messages:
+                if msg.author.role != "assistant":
+                    continue
                if len(msg.content) < 1:
                    continue
                msg_text = msg.content[0].text
-                if msg.recipient and msg.recipient.startswith("functions."):
+                if msg.recipient and is_function_recipient(msg.recipient):
                    # If no content-type is given assume JSON, as that's the
                    # most common case with gpt-oss models.
                    if not msg.content_type or "json" in msg.content_type:
@@ -72,7 +78,7 @@ class OpenAIToolParser(ToolParser):
                        ToolCall(
                            type="function",
                            function=FunctionCall(
-                                name=msg.recipient.split("functions.")[1],
+                                name=extract_function_from_recipient(msg.recipient),
                                arguments=tool_args,
                            ),
                        )