[Bugfix] Handle real-world gpt-oss tool call output in Harmony parsing (#42454)

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning
2026-05-13 13:54:46 -04:00
committed by GitHub
parent b3c69595a6
commit 0f69128a37
12 changed files with 801 additions and 50 deletions
@@ -199,12 +199,107 @@ class TestExtractHarmonyStreamingDelta:
assert delta_message.content == delta_text
assert tools_streamed is False
@pytest.mark.parametrize("channel", ["commentary", "analysis"])
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
def test_new_tool_call_without_functions_prefix(
self, mock_make_tool_call_id, channel
):
mock_make_tool_call_id.return_value = "call_bare123"
parser = MockStreamableParser()
token_states = [TokenState(channel=channel, recipient="get_weather", text="")]
delta_message, tools_streamed = extract_harmony_streaming_delta(
harmony_parser=parser,
token_states=token_states,
prev_recipient=None,
include_reasoning=False,
)
assert delta_message is not None
assert len(delta_message.tool_calls) == 1
tool_call = delta_message.tool_calls[0]
assert tool_call.id == "call_bare123"
assert tool_call.type == "function"
assert tool_call.function.name == "get_weather"
assert tool_call.function.arguments == ""
assert tool_call.index == 0
assert tools_streamed is True
@pytest.mark.parametrize("channel", ["commentary", "analysis"])
def test_tool_call_argument_streaming_without_functions_prefix(self, channel):
parser = MockStreamableParser()
args_text = '{"location": "Paris"}'
token_states = [
TokenState(channel=channel, recipient="get_weather", text=args_text)
]
delta_message, tools_streamed = extract_harmony_streaming_delta(
harmony_parser=parser,
token_states=token_states,
prev_recipient="get_weather",
include_reasoning=False,
)
assert delta_message is not None
tool_call = delta_message.tool_calls[0]
assert tool_call.id is None
assert tool_call.function.arguments == args_text
assert tool_call.index == 0
assert tools_streamed is True
def test_tool_call_index_from_previous_messages_without_functions_prefix(self):
messages = [
MockMessage(channel="commentary", recipient="tool1"),
]
parser = MockStreamableParser(messages=messages)
token_states = [
TokenState(channel="commentary", recipient="tool2", text="args")
]
delta_message, _ = extract_harmony_streaming_delta(
harmony_parser=parser,
token_states=token_states,
prev_recipient="tool2",
include_reasoning=False,
)
assert delta_message.tool_calls[0].index == 1
@pytest.mark.parametrize("channel", ["commentary", "analysis"])
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
def test_new_tool_call_dotted_function_name(self, mock_make_tool_call_id, channel):
mock_make_tool_call_id.return_value = "call_dotted123"
parser = MockStreamableParser()
token_states = [TokenState(channel=channel, recipient="math.sum", text="")]
delta_message, tools_streamed = extract_harmony_streaming_delta(
harmony_parser=parser,
token_states=token_states,
prev_recipient=None,
include_reasoning=False,
)
assert delta_message is not None
assert len(delta_message.tool_calls) == 1
tool_call = delta_message.tool_calls[0]
assert tool_call.id == "call_dotted123"
assert tool_call.type == "function"
assert tool_call.function.name == "math.sum"
assert tool_call.function.arguments == ""
assert tool_call.index == 0
assert tools_streamed is True
@pytest.mark.parametrize(
"channel,recipient",
[
(None, None),
("unknown_channel", None),
("commentary", "browser.search"),
("commentary", "assistant"),
],
)
def test_returns_none_for_invalid_inputs(self, channel, recipient):
@@ -348,3 +443,92 @@ class TestExtractHarmonyStreamingDelta:
assert tool_c_args.function.arguments == '{"key_c": "val_c"}'
assert delta_message.content == "Thinking... Thinking again..."
class TestToolCallsOnNonStandardChannels:
"""Tool calls are detected by recipient, not channel.
Models sometimes emit tool calls on unexpected channels (e.g. ``comment``
instead of ``commentary``). These tests verify that the streaming delta
extraction is channel-agnostic for tool call detection.
"""
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
def test_prefixed_tool_call_on_comment_channel(self, mock_make_tool_call_id):
mock_make_tool_call_id.return_value = "call_comment_chan"
parser = MockStreamableParser()
token_states = [
TokenState(channel="comment", recipient="functions.get_weather", text="")
]
delta_message, tools_streamed = extract_harmony_streaming_delta(
harmony_parser=parser,
token_states=token_states,
prev_recipient=None,
include_reasoning=False,
)
assert delta_message is not None
assert len(delta_message.tool_calls) == 1
assert delta_message.tool_calls[0].function.name == "get_weather"
assert tools_streamed is True
@patch("vllm.entrypoints.openai.chat_completion.stream_harmony.make_tool_call_id")
def test_bare_tool_call_on_comment_channel(self, mock_make_tool_call_id):
mock_make_tool_call_id.return_value = "call_bare_comment"
parser = MockStreamableParser()
token_states = [TokenState(channel="comment", recipient="get_weather", text="")]
delta_message, tools_streamed = extract_harmony_streaming_delta(
harmony_parser=parser,
token_states=token_states,
prev_recipient=None,
include_reasoning=False,
)
assert delta_message is not None
assert len(delta_message.tool_calls) == 1
assert delta_message.tool_calls[0].function.name == "get_weather"
assert tools_streamed is True
def test_tool_call_arguments_on_comment_channel(self):
parser = MockStreamableParser()
args_text = '{"location": "Paris"}'
token_states = [
TokenState(
channel="comment", recipient="functions.get_weather", text=args_text
)
]
delta_message, tools_streamed = extract_harmony_streaming_delta(
harmony_parser=parser,
token_states=token_states,
prev_recipient="functions.get_weather",
include_reasoning=False,
)
assert delta_message is not None
assert delta_message.tool_calls[0].function.arguments == args_text
assert tools_streamed is True
def test_base_index_counts_tool_calls_on_comment_channel(self):
messages = [
MockMessage(channel="comment", recipient="functions.tool1"),
]
parser = MockStreamableParser(messages=messages)
token_states = [
TokenState(channel="commentary", recipient="functions.tool2", text="args")
]
delta_message, _ = extract_harmony_streaming_delta(
harmony_parser=parser,
token_states=token_states,
prev_recipient="functions.tool2",
include_reasoning=False,
)
assert delta_message.tool_calls[0].index == 1
@@ -7,9 +7,11 @@ from openai_harmony import Message, Role
from tests.entrypoints.openai.utils import verify_harmony_messages
from vllm.entrypoints.openai.parser.harmony_utils import (
auto_drop_analysis_messages,
extract_function_from_recipient,
get_encoding,
get_system_message,
has_custom_tools,
is_function_recipient,
parse_chat_input_to_harmony_message,
parse_chat_output,
)
@@ -19,6 +21,182 @@ from vllm.entrypoints.openai.responses.harmony import (
)
class TestIsFunctionRecipient:
@pytest.mark.parametrize(
"recipient",
[
"functions.get_weather",
"functions.search_web",
"functions.math.sum",
],
)
def test_functions_prefix_accepted(self, recipient):
assert is_function_recipient(recipient) is True
@pytest.mark.parametrize(
"recipient",
[
"get_weather",
"search_web",
"calculator",
"my-tool",
],
)
def test_bare_function_name_accepted(self, recipient):
assert is_function_recipient(recipient) is True
@pytest.mark.parametrize(
"recipient",
[
"assistant",
],
)
def test_assistant_rejected(self, recipient):
assert is_function_recipient(recipient) is False
@pytest.mark.parametrize(
"recipient",
[
"math.sum",
"code.run",
"namespace.tool_name",
"my.deeply.nested.tool",
],
)
def test_dotted_function_names_accepted(self, recipient):
assert is_function_recipient(recipient) is True
@pytest.mark.parametrize(
"recipient",
[
"python",
"browser",
"container",
],
)
def test_builtin_tool_names_rejected(self, recipient):
assert is_function_recipient(recipient) is False
@pytest.mark.parametrize(
"recipient",
[
"python.run",
"python.execute",
"browser.search",
"browser.open",
"container.exec",
],
)
def test_builtin_dotted_variants_rejected(self, recipient):
assert is_function_recipient(recipient) is False
@pytest.mark.parametrize(
"recipient",
[
"",
"functions.",
],
)
def test_empty_recipients_rejected(self, recipient):
assert is_function_recipient(recipient) is False
@pytest.mark.parametrize(
"recipient",
[
"<|start|>",
"<|end|>",
"<|channel|>",
],
)
def test_harmony_tokens_rejected(self, recipient):
assert is_function_recipient(recipient) is False
class TestIsFunctionRecipientWithAllowedNames:
"""Tests for is_function_recipient with allowed_function_tool_names."""
def test_prefixed_always_accepted(self):
"""functions. prefix is always accepted regardless of allowed names."""
fn_names = frozenset({"other_tool"})
assert is_function_recipient("functions.get_weather", fn_names) is True
def test_bare_name_accepted_when_in_allowed_names(self):
fn_names = frozenset({"get_weather", "search_web"})
assert is_function_recipient("get_weather", fn_names) is True
assert is_function_recipient("search_web", fn_names) is True
def test_bare_name_rejected_when_not_in_allowed_names(self):
fn_names = frozenset({"get_weather"})
assert is_function_recipient("unknown_tool", fn_names) is False
def test_dotted_name_accepted_when_in_allowed_names(self):
fn_names = frozenset({"math.sum", "namespace.tool_name"})
assert is_function_recipient("math.sum", fn_names) is True
assert is_function_recipient("namespace.tool_name", fn_names) is True
def test_dotted_name_rejected_when_not_in_allowed_names(self):
fn_names = frozenset({"get_weather"})
assert is_function_recipient("custom_server.search", fn_names) is False
def test_empty_allowed_names_rejects_bare_names(self):
"""Empty frozenset means no function tools — bare names are not functions."""
fn_names: frozenset[str] = frozenset()
assert is_function_recipient("get_weather", fn_names) is False
assert is_function_recipient("math.sum", fn_names) is False
def test_builtin_tools_always_rejected(self):
fn_names = frozenset({"python", "browser", "container"})
assert is_function_recipient("python", fn_names) is False
assert is_function_recipient("browser", fn_names) is False
assert is_function_recipient("container", fn_names) is False
def test_builtin_dotted_always_rejected(self):
fn_names = frozenset({"python.run", "browser.search"})
assert is_function_recipient("python.run", fn_names) is False
assert is_function_recipient("browser.search", fn_names) is False
def test_none_allowed_names_uses_heuristic(self):
"""When allowed names is None (Chat Completions), use heuristic."""
assert is_function_recipient("get_weather", None) is True
assert is_function_recipient("math.sum", None) is True
assert is_function_recipient("python", None) is False
class TestExtractFunctionFromRecipient:
@pytest.mark.parametrize(
"recipient,expected",
[
("functions.get_weather", "get_weather"),
("functions.search_web", "search_web"),
("functions.", ""),
],
)
def test_strips_functions_prefix(self, recipient, expected):
assert extract_function_from_recipient(recipient) == expected
@pytest.mark.parametrize(
"recipient",
[
"get_weather",
"calculator",
"my-tool",
],
)
def test_bare_name_returned_as_is(self, recipient):
assert extract_function_from_recipient(recipient) == recipient
@pytest.mark.parametrize(
"recipient,expected",
[
("functions.math.sum", "math.sum"),
("math.sum", "math.sum"),
("namespace.tool_name", "namespace.tool_name"),
],
)
def test_dotted_function_name_extraction(self, recipient, expected):
assert extract_function_from_recipient(recipient) == expected
class TestCommonParseInputToHarmonyMessage:
"""
Tests for scenarios that are common to both Chat Completion
@@ -246,7 +246,8 @@ class TestHarmonyToResponseOutput:
message = message.with_channel("commentary")
message = message.with_recipient("custom_tool")
output_items = harmony_to_response_output(message)
fn_names = frozenset({"other_tool"})
output_items = harmony_to_response_output(message, fn_names)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
@@ -286,13 +287,179 @@ class TestHarmonyToResponseOutput:
assert len(output_items) == 0
class TestHarmonyToResponseOutputWithFunctionToolNames:
"""Tests for bare function name handling with function_tool_names."""
def test_bare_name_creates_function_call_when_in_tool_names(self):
"""Bare function name matching a known tool creates function call."""
message = Message.from_role_and_content(
Role.ASSISTANT, '{"location": "San Francisco"}'
)
message = message.with_channel("commentary")
message = message.with_recipient("get_weather")
fn_names = frozenset({"get_weather"})
output_items = harmony_to_response_output(message, fn_names)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseFunctionToolCall)
assert output_items[0].type == "function_call"
assert output_items[0].name == "get_weather"
assert output_items[0].arguments == '{"location": "San Francisco"}'
def test_bare_name_creates_mcp_call_when_not_in_tool_names(self):
"""Bare name not matching any known tool creates MCP call."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
message = message.with_channel("commentary")
message = message.with_recipient("custom_tool")
fn_names = frozenset({"get_weather"})
output_items = harmony_to_response_output(message, fn_names)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
assert output_items[0].type == "mcp_call"
def test_dotted_function_name_creates_function_call(self):
"""Dotted function name in tool names creates function call."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"a": 1, "b": 2}')
message = message.with_channel("commentary")
message = message.with_recipient("math.sum")
fn_names = frozenset({"math.sum"})
output_items = harmony_to_response_output(message, fn_names)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseFunctionToolCall)
assert output_items[0].name == "math.sum"
def test_empty_tool_names_defaults_to_mcp(self):
"""With empty function_tool_names, bare names become MCP calls."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
message = message.with_channel("commentary")
message = message.with_recipient("get_weather")
output_items = harmony_to_response_output(message, frozenset())
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
def test_prefixed_name_always_function_call(self):
"""functions. prefix always creates function call even with empty tool names."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"arg": "value"}')
message = message.with_channel("commentary")
message = message.with_recipient("functions.get_weather")
output_items = harmony_to_response_output(message, frozenset())
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseFunctionToolCall)
assert output_items[0].name == "get_weather"
class TestParserStateWithFunctionToolNames:
"""Tests for parser_state_to_response_output with function_tool_names."""
def test_bare_name_creates_function_call(self):
from unittest.mock import Mock
parser = Mock()
parser.current_content = '{"arg": "value"}'
parser.current_role = Role.ASSISTANT
parser.current_channel = "commentary"
parser.current_recipient = "get_weather"
fn_names = frozenset({"get_weather"})
items = parser_state_to_response_output(parser, fn_names)
assert len(items) == 1
assert isinstance(items[0], ResponseFunctionToolCall)
assert items[0].name == "get_weather"
assert items[0].status == "in_progress"
def test_bare_name_creates_mcp_when_not_in_tool_names(self):
from unittest.mock import Mock
parser = Mock()
parser.current_content = '{"arg": "value"}'
parser.current_role = Role.ASSISTANT
parser.current_channel = "commentary"
parser.current_recipient = "unknown_tool"
fn_names = frozenset({"get_weather"})
items = parser_state_to_response_output(parser, fn_names)
assert len(items) == 1
assert isinstance(items[0], McpCall)
assert items[0].name == "unknown_tool"
class TestToolCallsOnNonStandardChannels:
"""Tests verifying tool calls are detected regardless of channel."""
def test_function_call_on_comment_channel(self):
message = Message.from_role_and_content(Role.ASSISTANT, '{"query": "weather"}')
message = message.with_channel("comment")
message = message.with_recipient("functions.get_weather")
output_items = harmony_to_response_output(message)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseFunctionToolCall)
assert output_items[0].type == "function_call"
assert output_items[0].name == "get_weather"
def test_bare_function_on_comment_channel(self):
message = Message.from_role_and_content(Role.ASSISTANT, '{"query": "weather"}')
message = message.with_channel("comment")
message = message.with_recipient("get_weather")
fn_names = frozenset({"get_weather"})
output_items = harmony_to_response_output(message, fn_names)
assert len(output_items) == 1
assert isinstance(output_items[0], ResponseFunctionToolCall)
assert output_items[0].name == "get_weather"
def test_parser_state_comment_channel_function(self):
from unittest.mock import Mock
parser = Mock()
parser.current_content = '{"arg": "value"}'
parser.current_role = Role.ASSISTANT
parser.current_channel = "comment"
parser.current_recipient = "functions.get_weather"
items = parser_state_to_response_output(parser)
assert len(items) == 1
assert isinstance(items[0], ResponseFunctionToolCall)
assert items[0].name == "get_weather"
def test_parser_state_comment_channel_mcp(self):
from unittest.mock import Mock
parser = Mock()
parser.current_content = '{"arg": "value"}'
parser.current_role = Role.ASSISTANT
parser.current_channel = "comment"
parser.current_recipient = "mcp.server.tool"
fn_names: frozenset[str] = frozenset()
items = parser_state_to_response_output(parser, fn_names)
assert len(items) == 1
assert isinstance(items[0], McpCall)
def test_parse_mcp_call_basic() -> None:
"""Test that MCP calls are parsed with correct type and server_label."""
message = Message.from_role_and_content(Role.ASSISTANT, '{"path": "/tmp"}')
message = message.with_recipient("filesystem")
message = message.with_channel("commentary")
output_items = harmony_to_response_output(message)
fn_names: frozenset[str] = frozenset()
output_items = harmony_to_response_output(message, fn_names)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
@@ -309,7 +476,8 @@ def test_parse_mcp_call_dotted_recipient() -> None:
message = message.with_recipient("repo_browser.list")
message = message.with_channel("commentary")
output_items = harmony_to_response_output(message)
fn_names: frozenset[str] = frozenset()
output_items = harmony_to_response_output(message, fn_names)
assert len(output_items) == 1
assert isinstance(output_items[0], McpCall)
@@ -371,7 +539,8 @@ def test_parser_state_to_response_output_commentary_channel() -> None:
parser_mcp.current_channel = "commentary"
parser_mcp.current_recipient = "filesystem"
mcp_items = parser_state_to_response_output(parser_mcp)
fn_names: frozenset[str] = frozenset()
mcp_items = parser_state_to_response_output(parser_mcp, fn_names)
assert len(mcp_items) == 1
assert isinstance(mcp_items[0], McpCall)
@@ -438,7 +607,8 @@ def test_parser_state_to_response_output_analysis_channel() -> None:
parser_mcp.current_channel = "analysis"
parser_mcp.current_recipient = "database"
mcp_items = parser_state_to_response_output(parser_mcp)
fn_names: frozenset[str] = frozenset()
mcp_items = parser_state_to_response_output(parser_mcp, fn_names)
assert len(mcp_items) == 1
assert isinstance(mcp_items[0], McpCall)
@@ -217,6 +217,158 @@ def test_extract_tool_calls_multiple_tools(
assert extracted_info.content is None
def test_extract_tool_calls_bare_function_name(
openai_tool_parser,
harmony_encoding,
):
convo = Conversation.from_messages(
[
Message.from_role_and_content(Role.USER, "What is the weather in Tokyo?"),
Message.from_role_and_content(
Role.ASSISTANT,
"We need to use get_current_weather tool.",
).with_channel("analysis"),
Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
.with_channel("commentary")
.with_recipient("get_current_weather")
.with_content_type("json"),
]
)
token_ids = harmony_encoding.render_conversation_for_completion(
convo, Role.ASSISTANT
)
extracted_info = openai_tool_parser.extract_tool_calls(
"",
request=None,
token_ids=token_ids,
)
assert extracted_info.tools_called
expected_tool_calls = [
ToolCall(
function=FunctionCall(
name="get_current_weather",
arguments=json.dumps({"location": "Tokyo"}),
)
)
]
assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
assert extracted_info.content is None
def test_extract_tool_calls_bare_function_name_multiple(
openai_tool_parser,
harmony_encoding,
):
convo = Conversation.from_messages(
[
Message.from_role_and_content(
Role.USER, "What is the weather in Tokyo based on where I'm at?"
),
Message.from_role_and_content(
Role.ASSISTANT,
"We need to use both tools.",
).with_channel("analysis"),
Message.from_role_and_content(Role.ASSISTANT, '{"location": "Tokyo"}')
.with_channel("commentary")
.with_recipient("get_current_weather")
.with_content_type("json"),
Message.from_role_and_content(Role.ASSISTANT, "{}")
.with_channel("commentary")
.with_recipient("get_user_location")
.with_content_type("json"),
]
)
token_ids = harmony_encoding.render_conversation_for_completion(
convo, Role.ASSISTANT
)
extracted_info = openai_tool_parser.extract_tool_calls(
"",
request=None,
token_ids=token_ids,
)
assert extracted_info.tools_called
expected_tool_calls = [
ToolCall(
function=FunctionCall(
name="get_current_weather",
arguments=json.dumps({"location": "Tokyo"}),
)
),
ToolCall(
function=FunctionCall(
name="get_user_location",
arguments=json.dumps({}),
)
),
]
assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
def test_extract_tool_calls_assistant_recipient_ignored(
openai_tool_parser,
harmony_encoding,
):
convo = Conversation.from_messages(
[
Message.from_role_and_content(Role.USER, "Hello"),
Message.from_role_and_content(Role.ASSISTANT, "Some tool response")
.with_channel("commentary")
.with_recipient("assistant"),
Message.from_role_and_content(
Role.ASSISTANT, "Here is the answer"
).with_channel("final"),
]
)
token_ids = harmony_encoding.render_conversation_for_completion(
convo, Role.ASSISTANT
)
extracted_info = openai_tool_parser.extract_tool_calls(
"",
request=None,
token_ids=token_ids,
)
assert not extracted_info.tools_called
assert extracted_info.tool_calls == []
assert extracted_info.content == "Here is the answer"
def test_extract_tool_calls_dotted_function_name(
openai_tool_parser,
harmony_encoding,
):
convo = Conversation.from_messages(
[
Message.from_role_and_content(Role.USER, "Compute 2+3"),
Message.from_role_and_content(Role.ASSISTANT, '{"a": 2, "b": 3}')
.with_channel("commentary")
.with_recipient("math.sum")
.with_content_type("json"),
]
)
token_ids = harmony_encoding.render_conversation_for_completion(
convo, Role.ASSISTANT
)
extracted_info = openai_tool_parser.extract_tool_calls(
"",
request=None,
token_ids=token_ids,
)
assert extracted_info.tools_called
expected_tool_calls = [
ToolCall(
function=FunctionCall(
name="math.sum",
arguments=json.dumps({"a": 2, "b": 3}),
)
)
]
assert_tool_calls(extracted_info.tool_calls, expected_tool_calls)
def test_extract_tool_calls_with_content(
openai_tool_parser,
harmony_encoding,
@@ -17,6 +17,10 @@ from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
DeltaToolCall,
)
from vllm.entrypoints.openai.parser.harmony_utils import (
extract_function_from_recipient,
is_function_recipient,
)
class TokenState(NamedTuple):
@@ -79,16 +83,12 @@ def extract_harmony_streaming_delta(
# This counts completed tool calls in messages
base_index = 0
for msg in harmony_parser.messages:
if (
(msg.channel == "commentary" or msg.channel == "analysis")
and msg.recipient
and msg.recipient.startswith("functions.")
):
if msg.recipient and is_function_recipient(msg.recipient):
base_index += 1
# If there's an ongoing tool call from previous chunk,
# the next new tool call starts at base_index + 1
if prev_recipient and prev_recipient.startswith("functions."):
if prev_recipient and is_function_recipient(prev_recipient):
next_tool_index = base_index + 1
# Ongoing call is at base_index
ongoing_tool_index = base_index
@@ -101,15 +101,11 @@ def extract_harmony_streaming_delta(
if group.channel == "final":
combined_content += group.text
content_encountered = True
elif (
(group.channel == "commentary" or group.channel == "analysis")
and group.recipient
and group.recipient.startswith("functions.")
):
elif group.recipient and is_function_recipient(group.recipient):
opened_new_call = False
if prev_recipient != group.recipient:
# New tool call - emit the opening message
tool_name = group.recipient.split("functions.", 1)[1]
tool_name = extract_function_from_recipient(group.recipient)
tool_messages.append(
DeltaToolCall(
id=make_tool_call_id(),
@@ -26,6 +26,42 @@ from vllm.logger import init_logger
logger = init_logger(__name__)
def is_function_recipient(
recipient: str,
allowed_function_tool_names: frozenset[str] | None = None,
) -> bool:
"""Check whether *recipient* refers to a function tool call.
The optional *allowed_function_tool_names* parameter is used by the
Responses API to distinguish bare function-call recipients (missing the
``functions.`` prefix) from MCP tool calls. When provided, a bare
recipient is only treated as a function call if it appears in the set.
The Chat Completions path omits this parameter so that all bare
recipients are accepted as function calls (the heuristic fallback).
"""
if not recipient:
return False
if recipient.startswith("<|"):
return False
if recipient.startswith("functions."):
return len(recipient) > len("functions.")
if recipient == "assistant":
return False
if recipient in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
return False
first_segment = recipient.split(".", 1)[0]
if first_segment in BUILTIN_TOOL_TO_MCP_SERVER_LABEL:
return False
if allowed_function_tool_names is not None:
return recipient in allowed_function_tool_names
return True
def extract_function_from_recipient(recipient: str) -> str:
return recipient.removeprefix("functions.")
REASONING_EFFORT = {
"high": ReasoningEffort.HIGH,
"medium": ReasoningEffort.MEDIUM,
@@ -524,10 +524,12 @@ class HarmonyContext(ConversationContext):
self,
messages: list,
available_tools: list[str],
function_tool_names: frozenset[str] | None = None,
):
self._messages = messages
self.finish_reason: str | None = None
self.available_tools = available_tools
self.function_tool_names = function_tool_names
self._tool_sessions: dict[str, ClientSession | Tool] = {}
self.called_tools: set[str] = set()
+13 -7
View File
@@ -31,7 +31,9 @@ from openai_harmony import Author, Message, Role, StreamableParser, TextContent
from vllm.entrypoints.openai.parser.harmony_utils import (
BUILTIN_TOOL_TO_MCP_SERVER_LABEL,
extract_function_from_recipient,
flatten_chat_text_content,
is_function_recipient,
)
from vllm.entrypoints.openai.responses.protocol import (
ResponseInputOutputItem,
@@ -294,7 +296,7 @@ def _parse_browser_tool_call(message: Message, recipient: str) -> ResponseOutput
def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutputItem]:
"""Parse function calls into function tool call items."""
function_name = recipient.split(".")[-1]
function_name = extract_function_from_recipient(recipient)
output_items = []
for content in message.content:
random_id = random_uuid()
@@ -410,7 +412,10 @@ def _parse_message_no_recipient(
# ---------------------------------------------------------------------------
def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
def harmony_to_response_output(
message: Message,
function_tool_names: frozenset[str] | None = None,
) -> list[ResponseOutputItem]:
"""Parse a Harmony message into a list of output response items.
This is the main dispatcher that routes based on channel and recipient.
@@ -429,8 +434,8 @@ def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
if recipient.startswith("browser."):
output_items.append(_parse_browser_tool_call(message, recipient))
# Function calls (should only happen on commentary channel)
elif message.channel == "commentary" and recipient.startswith("functions."):
# Function calls (with or without "functions." prefix)
elif is_function_recipient(recipient, function_tool_names):
output_items.extend(_parse_function_call(message, recipient))
# Built-in MCP tools (python, browser, container)
@@ -450,6 +455,7 @@ def harmony_to_response_output(message: Message) -> list[ResponseOutputItem]:
def parser_state_to_response_output(
parser: StreamableParser,
function_tool_names: frozenset[str] | None = None,
) -> list[ResponseOutputItem]:
"""Extract in-progress response items from incomplete parser state.
@@ -464,15 +470,15 @@ def parser_state_to_response_output(
if current_recipient is not None and current_recipient.startswith("browser."):
return []
if current_recipient and parser.current_channel in ("commentary", "analysis"):
if current_recipient.startswith("functions."):
if current_recipient:
if is_function_recipient(current_recipient, function_tool_names):
rid = random_uuid()
return [
ResponseFunctionToolCall(
arguments=parser.current_content,
call_id=f"call_{rid}",
type="function_call",
name=current_recipient.split(".")[-1],
name=extract_function_from_recipient(current_recipient),
id=f"fc_{rid}",
status="in_progress",
)
+14 -5
View File
@@ -89,6 +89,7 @@ from vllm.entrypoints.openai.responses.streaming_events import (
from vllm.entrypoints.openai.responses.utils import (
construct_input_messages,
construct_tool_dicts,
extract_function_tool_names,
extract_tool_types,
)
from vllm.entrypoints.serve.render.serving import OpenAIServingRender
@@ -449,11 +450,16 @@ class OpenAIServingResponses(OpenAIServing):
)
context: ConversationContext
function_tool_names = extract_function_tool_names(request.tools)
if self.use_harmony:
if request.stream:
context = StreamingHarmonyContext(messages, available_tools)
context = StreamingHarmonyContext(
messages, available_tools, function_tool_names
)
else:
context = HarmonyContext(messages, available_tools)
context = HarmonyContext(
messages, available_tools, function_tool_names
)
else:
if envs.VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT:
# This is a feature in development for parsing
@@ -1070,10 +1076,11 @@ class OpenAIServingResponses(OpenAIServing):
) -> list[ResponseOutputItem]:
output_items: list[ResponseOutputItem] = []
num_init_messages = context.num_init_messages
fn_names = context.function_tool_names
for msg in context.messages[num_init_messages:]:
output_items.extend(harmony_to_response_output(msg))
output_items.extend(harmony_to_response_output(msg, fn_names))
# Handle the generation stopped in the middle (if any).
last_items = parser_state_to_response_output(context.parser)
last_items = parser_state_to_response_output(context.parser, fn_names)
if last_items:
output_items.extend(last_items)
return output_items
@@ -1448,7 +1455,9 @@ class OpenAIServingResponses(OpenAIServing):
if ctx.is_expecting_start():
if len(ctx.parser.messages) > 0:
previous_item = ctx.parser.messages[-1]
for event in emit_previous_item_done_events(previous_item, state):
for event in emit_previous_item_done_events(
previous_item, state, ctx.function_tool_names
):
yield _increment_sequence_number_and_return(event)
state.reset_for_new_item()
@@ -62,6 +62,10 @@ from openai_harmony import Message as HarmonyMessage
from vllm.entrypoints.mcp.tool_server import ToolServer
from vllm.entrypoints.openai.engine.protocol import DeltaMessage
from vllm.entrypoints.openai.parser.harmony_utils import (
extract_function_from_recipient,
is_function_recipient,
)
from vllm.entrypoints.openai.responses.context import StreamingHarmonyContext
from vllm.entrypoints.openai.responses.protocol import (
ResponseReasoningPartAddedEvent,
@@ -111,19 +115,19 @@ class StreamingState:
self.current_call_id = ""
def is_mcp_tool_by_namespace(recipient: str | None) -> bool:
def is_mcp_tool_by_namespace(
recipient: str | None,
allowed_function_tool_names: frozenset[str] | None = None,
) -> bool:
"""
Determine if a tool call is an MCP tool based on recipient prefix.
- Tools starting with "functions." are function calls
- Everything else is an MCP tool
Inverse of :func:`is_function_recipient` — everything that is not
a function call is an MCP tool.
"""
if recipient is None:
return False
# Function calls have "functions." prefix
# Everything else is an MCP tool
return not recipient.startswith("functions.")
return not is_function_recipient(recipient, allowed_function_tool_names)
# =====================================================================
@@ -575,16 +579,16 @@ def emit_content_delta_events(
return emit_text_delta_events(delta, state)
elif channel == "analysis" and recipient is None:
return emit_reasoning_delta_events(delta, state)
# built-in tools will be triggered on the analysis channel
# However, occasionally built-in tools will
# still be output to commentary.
elif channel in ("commentary", "analysis") and recipient is not None:
if recipient.startswith("functions."):
function_name = recipient[len("functions.") :]
elif recipient is not None:
fn_names = ctx.function_tool_names
if is_function_recipient(recipient, fn_names):
function_name = extract_function_from_recipient(recipient)
return emit_function_call_delta_events(delta, function_name, state)
elif recipient == "python":
return emit_code_interpreter_delta_events(delta, state)
elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(recipient):
elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(
recipient, fn_names
):
return emit_mcp_delta_events(delta, state, recipient)
return []
@@ -593,6 +597,7 @@ def emit_content_delta_events(
def emit_previous_item_done_events(
previous_item: HarmonyMessage,
state: StreamingState,
function_tool_names: frozenset[str] | None = None,
) -> list[StreamingResponsesResponse]:
"""Emit done events for the previous item when expecting a new start.
@@ -602,13 +607,13 @@ def emit_previous_item_done_events(
text = previous_item.content[0].text
if previous_item.recipient is not None:
# Deal with tool call
if previous_item.recipient.startswith("functions."):
function_name = previous_item.recipient[len("functions.") :]
if is_function_recipient(previous_item.recipient, function_tool_names):
function_name = extract_function_from_recipient(previous_item.recipient)
return emit_function_call_done_events(function_name, text, state)
elif previous_item.recipient == "python":
return emit_code_interpreter_completion_events(previous_item, state)
elif (
is_mcp_tool_by_namespace(previous_item.recipient)
is_mcp_tool_by_namespace(previous_item.recipient, function_tool_names)
and state.current_item_id is not None
and state.current_item_id.startswith("mcp_")
):
@@ -792,9 +797,12 @@ def emit_tool_action_events(
and state.sent_output_item_added
):
recipient = previous_item.recipient
fn_names = ctx.function_tool_names
if recipient == "python":
events.extend(emit_code_interpreter_completion_events(previous_item, state))
elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(recipient):
elif recipient.startswith("mcp.") or is_mcp_tool_by_namespace(
recipient, fn_names
):
events.extend(
emit_mcp_completion_events(
recipient, previous_item.content[0].text, state
@@ -236,6 +236,10 @@ def _construct_message_from_response_item(
return item # type: ignore[arg-type]
def extract_function_tool_names(tools: list[Tool]) -> frozenset[str]:
return frozenset(tool.name for tool in tools if tool.type == "function")
def extract_tool_types(tools: list[Tool]) -> set[str]:
"""
Extracts the tool types from the given tools.
+9 -3
View File
@@ -13,7 +13,11 @@ from vllm.entrypoints.openai.engine.protocol import (
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.parser.harmony_utils import parse_output_into_messages
from vllm.entrypoints.openai.parser.harmony_utils import (
extract_function_from_recipient,
is_function_recipient,
parse_output_into_messages,
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
Tool,
@@ -50,10 +54,12 @@ class OpenAIToolParser(ToolParser):
if len(parser.messages) > 0:
for msg in parser.messages:
if msg.author.role != "assistant":
continue
if len(msg.content) < 1:
continue
msg_text = msg.content[0].text
if msg.recipient and msg.recipient.startswith("functions."):
if msg.recipient and is_function_recipient(msg.recipient):
# If no content-type is given assume JSON, as that's the
# most common case with gpt-oss models.
if not msg.content_type or "json" in msg.content_type:
@@ -72,7 +78,7 @@ class OpenAIToolParser(ToolParser):
ToolCall(
type="function",
function=FunctionCall(
name=msg.recipient.split("functions.")[1],
name=extract_function_from_recipient(msg.recipient),
arguments=tool_args,
),
)